Actual source code: bvimpl.h
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #pragma once
13: #include <slepcbv.h>
14: #include <slepc/private/slepcimpl.h>
16: /* SUBMANSEC = BV */
18: SLEPC_EXTERN PetscBool BVRegisterAllCalled;
19: SLEPC_EXTERN PetscErrorCode BVRegisterAll(void);
21: SLEPC_EXTERN PetscLogEvent BV_Create,BV_Copy,BV_Mult,BV_MultVec,BV_MultInPlace,BV_Dot,BV_DotVec,BV_Orthogonalize,BV_OrthogonalizeVec,BV_Scale,BV_Norm,BV_NormVec,BV_Normalize,BV_SetRandom,BV_MatMult,BV_MatMultVec,BV_MatProject,BV_SVDAndRank;
23: typedef struct _BVOps *BVOps;
25: struct _BVOps {
26: PetscErrorCode (*mult)(BV,PetscScalar,PetscScalar,BV,Mat);
27: PetscErrorCode (*multvec)(BV,PetscScalar,PetscScalar,Vec,PetscScalar*);
28: PetscErrorCode (*multinplace)(BV,Mat,PetscInt,PetscInt);
29: PetscErrorCode (*multinplacetrans)(BV,Mat,PetscInt,PetscInt);
30: PetscErrorCode (*dot)(BV,BV,Mat);
31: PetscErrorCode (*dotvec)(BV,Vec,PetscScalar*);
32: PetscErrorCode (*dotvec_local)(BV,Vec,PetscScalar*);
33: PetscErrorCode (*dotvec_begin)(BV,Vec,PetscScalar*);
34: PetscErrorCode (*dotvec_end)(BV,Vec,PetscScalar*);
35: PetscErrorCode (*scale)(BV,PetscInt,PetscScalar);
36: PetscErrorCode (*norm)(BV,PetscInt,NormType,PetscReal*);
37: PetscErrorCode (*norm_local)(BV,PetscInt,NormType,PetscReal*);
38: PetscErrorCode (*norm_begin)(BV,PetscInt,NormType,PetscReal*);
39: PetscErrorCode (*norm_end)(BV,PetscInt,NormType,PetscReal*);
40: PetscErrorCode (*normalize)(BV,PetscScalar*);
41: PetscErrorCode (*matmult)(BV,Mat,BV);
42: PetscErrorCode (*copy)(BV,BV);
43: PetscErrorCode (*copycolumn)(BV,PetscInt,PetscInt);
44: PetscErrorCode (*resize)(BV,PetscInt,PetscBool);
45: PetscErrorCode (*getcolumn)(BV,PetscInt,Vec*);
46: PetscErrorCode (*restorecolumn)(BV,PetscInt,Vec*);
47: PetscErrorCode (*getarray)(BV,PetscScalar**);
48: PetscErrorCode (*restorearray)(BV,PetscScalar**);
49: PetscErrorCode (*getarrayread)(BV,const PetscScalar**);
50: PetscErrorCode (*restorearrayread)(BV,const PetscScalar**);
51: PetscErrorCode (*restoresplit)(BV,BV*,BV*);
52: PetscErrorCode (*restoresplitrows)(BV,IS,IS,BV*,BV*);
53: PetscErrorCode (*gramschmidt)(BV,PetscInt,Vec,PetscBool*,PetscScalar*,PetscScalar*,PetscReal*,PetscReal*);
54: PetscErrorCode (*getmat)(BV,Mat*);
55: PetscErrorCode (*restoremat)(BV,Mat*);
56: PetscErrorCode (*duplicate)(BV,BV);
57: PetscErrorCode (*create)(BV);
58: PetscErrorCode (*setfromoptions)(BV,PetscOptionItems);
59: PetscErrorCode (*view)(BV,PetscViewer);
60: PetscErrorCode (*destroy)(BV);
61: };
63: struct _p_BV {
64: PETSCHEADER(struct _BVOps);
65: /*------------------------- User parameters --------------------------*/
66: PetscLayout map; /* layout of columns */
67: VecType vtype; /* vector type */
68: PetscInt n,N; /* dimensions of vectors (local, global) */
69: PetscInt m; /* number of vectors */
70: PetscInt l; /* number of leading columns */
71: PetscInt k; /* number of active columns */
72: PetscInt nc; /* number of constraints */
73: PetscInt ld; /* leading dimension */
74: BVOrthogType orthog_type; /* the method of vector orthogonalization */
75: BVOrthogRefineType orthog_ref; /* refinement method */
76: PetscReal orthog_eta; /* refinement threshold */
77: BVOrthogBlockType orthog_block; /* the method of block orthogonalization */
78: Mat matrix; /* inner product matrix */
79: PetscBool indef; /* matrix is indefinite */
80: BVMatMultType vmm; /* version of matmult operation */
81: PetscBool rrandom; /* reproducible random vectors */
82: PetscReal deftol; /* tolerance for BV_SafeSqrt */
84: /*---------------------- Cached data and workspace -------------------*/
85: Vec buffer; /* buffer vector used in orthogonalization */
86: Mat Abuffer; /* auxiliary seqdense matrix that wraps the buffer */
87: Vec Bx; /* result of matrix times a vector x */
88: PetscObjectId xid; /* object id of vector x */
89: PetscObjectState xstate; /* state of vector x */
90: Vec cv[2]; /* column vectors obtained with BVGetColumn() */
91: PetscInt ci[2]; /* column indices of obtained vectors */
92: PetscObjectState st[2]; /* state of obtained vectors */
93: PetscObjectId id[2]; /* object id of obtained vectors */
94: PetscScalar *h,*c; /* orthogonalization coefficients */
95: Vec omega; /* signature matrix values for indefinite case */
96: PetscBool defersfo; /* deferred call to setfromoptions */
97: BV cached; /* cached BV to store result of matrix times BV */
98: PetscObjectState bvstate; /* state of BV when BVApplyMatrixBV() was called */
99: BV L,R; /* BV objects obtained with BVGetSplit/Rows() */
100: PetscObjectState lstate,rstate;/* state of L and R when BVGetSplit/Rows() was called */
101: PetscInt lsplit; /* value of l when BVGetSplit() was called (-1 if BVGetSplitRows()) */
102: PetscInt issplit; /* !=0 if BV is from split (1=left, 2=right, -1=top, -2=bottom) */
103: BV splitparent; /* my parent if I am a split BV */
104: PetscRandom rand; /* random number generator */
105: Mat Acreate; /* matrix given at BVCreateFromMat() */
106: Mat Aget; /* matrix returned for BVGetMat() */
107: PetscBool cuda; /* true if NVIDIA GPU must be used */
108: PetscBool hip; /* true if AMD GPU must be used */
109: PetscBool sfocalled; /* setfromoptions has been called */
110: PetscScalar *work;
111: PetscInt lwork;
112: void *data;
113: };
115: /*
116: BV_SafeSqrt - Computes the square root of a scalar value alpha, which is
117: assumed to be z'*B*z. The result is
118: if definite inner product: res = sqrt(alpha)
119: if indefinite inner product: res = sgn(alpha)*sqrt(abs(alpha))
120: */
121: static inline PetscErrorCode BV_SafeSqrt(BV bv,PetscScalar alpha,PetscReal *res)
122: {
123: PetscReal absal,realp;
124: const char *msg;
126: PetscFunctionBegin;
127: absal = PetscAbsScalar(alpha);
128: realp = PetscRealPart(alpha);
129: if (PetscUnlikely(absal<PETSC_MACHINE_EPSILON)) PetscCall(PetscInfo(bv,"Zero norm %g, either the vector is zero or a semi-inner product is being used\n",(double)absal));
130: #if defined(PETSC_USE_COMPLEX)
131: PetscCheck(PetscAbsReal(PetscImaginaryPart(alpha))<bv->deftol || PetscAbsReal(PetscImaginaryPart(alpha))/absal<10*bv->deftol,PetscObjectComm((PetscObject)bv),PETSC_ERR_USER_INPUT,"The inner product is not well defined: nonzero imaginary part %g",(double)PetscImaginaryPart(alpha));
132: #endif
133: if (PetscUnlikely(bv->indef)) {
134: *res = (realp<0.0)? -PetscSqrtReal(-realp): PetscSqrtReal(realp);
135: } else {
136: msg = bv->matrix? "The inner product is not well defined: indefinite matrix %g": "Invalid inner product: %g";
137: PetscCheck(realp>-bv->deftol,PetscObjectComm((PetscObject)bv),PETSC_ERR_USER_INPUT,msg,(double)realp);
138: *res = (realp<0.0)? 0.0: PetscSqrtReal(realp);
139: }
140: PetscFunctionReturn(PETSC_SUCCESS);
141: }
143: /*
144: BV_IPMatMult - Multiply a vector x by the inner-product matrix, cache the
145: result in Bx.
146: */
147: static inline PetscErrorCode BV_IPMatMult(BV bv,Vec x)
148: {
149: PetscFunctionBegin;
150: if (((PetscObject)x)->id != bv->xid || ((PetscObject)x)->state != bv->xstate) {
151: if (PetscUnlikely(!bv->Bx)) PetscCall(MatCreateVecs(bv->matrix,&bv->Bx,NULL));
152: PetscCall(MatMult(bv->matrix,x,bv->Bx));
153: PetscCall(PetscObjectGetId((PetscObject)x,&bv->xid));
154: PetscCall(VecGetState(x,&bv->xstate));
155: }
156: PetscFunctionReturn(PETSC_SUCCESS);
157: }
159: /*
160: BV_IPMatMultBV - Multiply BV by the inner-product matrix, cache the
161: result internally in bv->cached.
162: */
163: static inline PetscErrorCode BV_IPMatMultBV(BV bv)
164: {
165: PetscFunctionBegin;
166: PetscCall(BVGetCachedBV(bv,&bv->cached));
167: if (((PetscObject)bv)->state != bv->bvstate || bv->l != bv->cached->l || bv->k != bv->cached->k) {
168: PetscCall(BVSetActiveColumns(bv->cached,bv->l,bv->k));
169: if (bv->matrix) PetscCall(BVMatMult(bv,bv->matrix,bv->cached));
170: else PetscCall(BVCopy(bv,bv->cached));
171: bv->bvstate = ((PetscObject)bv)->state;
172: }
173: PetscFunctionReturn(PETSC_SUCCESS);
174: }
176: /*
177: BV_AllocateCoeffs - Allocate orthogonalization coefficients if not done already.
178: */
179: static inline PetscErrorCode BV_AllocateCoeffs(BV bv)
180: {
181: PetscFunctionBegin;
182: if (!bv->h) PetscCall(PetscMalloc2(bv->nc+bv->m,&bv->h,bv->nc+bv->m,&bv->c));
183: PetscFunctionReturn(PETSC_SUCCESS);
184: }
186: /*
187: BV_AllocateSignature - Allocate signature coefficients if not done already.
188: */
189: static inline PetscErrorCode BV_AllocateSignature(BV bv)
190: {
191: PetscFunctionBegin;
192: if (bv->indef && !bv->omega) {
193: if (bv->cuda) {
194: #if defined(PETSC_HAVE_CUDA)
195: PetscCall(VecCreateSeqCUDA(PETSC_COMM_SELF,bv->nc+bv->m,&bv->omega));
196: #else
197: SETERRQ(PetscObjectComm((PetscObject)bv),PETSC_ERR_PLIB,"Something wrong happened");
198: #endif
199: } else if (bv->hip) {
200: #if defined(PETSC_HAVE_HIP)
201: PetscCall(VecCreateSeqHIP(PETSC_COMM_SELF,bv->nc+bv->m,&bv->omega));
202: #else
203: SETERRQ(PetscObjectComm((PetscObject)bv),PETSC_ERR_PLIB,"Something wrong happened");
204: #endif
205: } else PetscCall(VecCreateSeq(PETSC_COMM_SELF,bv->nc+bv->m,&bv->omega));
206: PetscCall(VecSet(bv->omega,1.0));
207: }
208: PetscFunctionReturn(PETSC_SUCCESS);
209: }
211: /*
212: BV_SetMatrixDiagonal - sets the inner product matrix for BV as a diagonal matrix
213: with the diagonal specified by vector vomega, using the same matrix type as matrix M
214: */
215: static inline PetscErrorCode BV_SetMatrixDiagonal(BV bv,Vec vomega,Mat M)
216: {
217: Mat Omega;
218: MatType Mtype;
220: PetscFunctionBegin;
221: PetscCall(MatGetType(M,&Mtype));
222: PetscCall(MatCreate(PetscObjectComm((PetscObject)bv),&Omega));
223: PetscCall(MatSetSizes(Omega,bv->n,bv->n,bv->N,bv->N));
224: PetscCall(MatSetType(Omega,Mtype));
225: PetscCall(MatDiagonalSet(Omega,vomega,INSERT_VALUES));
226: PetscCall(BVSetMatrix(bv,Omega,PETSC_TRUE));
227: PetscCall(MatDestroy(&Omega));
228: PetscFunctionReturn(PETSC_SUCCESS);
229: }
231: /*
232: BVAvailableVec: First (0) or second (1) vector available for
233: getcolumn operation (or -1 if both vectors already fetched).
234: */
235: #define BVAvailableVec (((bv->ci[0]==-bv->nc-1)? 0: (bv->ci[1]==-bv->nc-1)? 1: -1))
237: /*
238: Macros to test valid BV arguments
239: */
240: #if !defined(PETSC_USE_DEBUG)
242: #define BVCheckSizes(h,arg) do {(void)(h);} while (0)
243: #define BVCheckOp(h,arg,op) do {(void)(h);} while (0)
245: #else
247: #define BVCheckSizes(h,arg) \
248: do { \
249: PetscCheck((h)->m,PetscObjectComm((PetscObject)(h)),PETSC_ERR_ARG_WRONGSTATE,"BV sizes have not been defined: Parameter #%d",arg); \
250: } while (0)
252: #define BVCheckOp(h,arg,op) \
253: do { \
254: PetscCheck((h)->ops->op,PetscObjectComm((PetscObject)(h)),PETSC_ERR_SUP,"Operation not implemented in this BV type: Parameter #%d",arg); \
255: } while (0)
257: #endif
259: SLEPC_INTERN PetscErrorCode BVView_Vecs(BV,PetscViewer);
261: SLEPC_INTERN PetscErrorCode BVAllocateWork_Private(BV,PetscInt);
263: SLEPC_INTERN PetscErrorCode BVMult_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
264: SLEPC_INTERN PetscErrorCode BVMultVec_BLAS_Private(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar,PetscScalar*);
265: SLEPC_INTERN PetscErrorCode BVMultInPlace_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,PetscInt,PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscBool);
266: SLEPC_INTERN PetscErrorCode BVMultInPlace_Vecs_Private(BV,PetscInt,PetscInt,PetscInt,Vec*,const PetscScalar*,PetscBool);
267: SLEPC_INTERN PetscErrorCode BVAXPY_BLAS_Private(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
268: SLEPC_INTERN PetscErrorCode BVDot_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar*,PetscInt,PetscBool);
269: SLEPC_INTERN PetscErrorCode BVDotVec_BLAS_Private(BV,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar*,PetscBool);
270: SLEPC_INTERN PetscErrorCode BVScale_BLAS_Private(BV,PetscInt,PetscScalar*,PetscScalar);
271: SLEPC_INTERN PetscErrorCode BVNorm_LAPACK_Private(BV,PetscInt,PetscInt,const PetscScalar*,PetscInt,NormType,PetscReal*,PetscBool);
272: SLEPC_INTERN PetscErrorCode BVNormalize_LAPACK_Private(BV,PetscInt,PetscInt,const PetscScalar*,PetscInt,PetscScalar*,PetscBool);
273: SLEPC_INTERN PetscErrorCode BVGetMat_Default(BV,Mat*);
274: SLEPC_INTERN PetscErrorCode BVRestoreMat_Default(BV,Mat*);
275: SLEPC_INTERN PetscErrorCode BVMatCholInv_LAPACK_Private(BV,Mat,Mat);
276: SLEPC_INTERN PetscErrorCode BVMatTriInv_LAPACK_Private(BV,Mat,Mat);
277: SLEPC_INTERN PetscErrorCode BVMatSVQB_LAPACK_Private(BV,Mat,Mat);
278: SLEPC_INTERN PetscErrorCode BVOrthogonalize_LAPACK_TSQR(BV,PetscInt,PetscInt,PetscScalar*,PetscInt,PetscScalar*,PetscInt);
279: SLEPC_INTERN PetscErrorCode BVOrthogonalize_LAPACK_TSQR_OnlyR(BV,PetscInt,PetscInt,PetscScalar*,PetscInt,PetscScalar*,PetscInt);
281: /* reduction operations used in BVOrthogonalize and BVNormalize */
282: SLEPC_EXTERN MPI_Op MPIU_TSQR, MPIU_LAPY2;
283: SLEPC_EXTERN void MPIAPI SlepcGivensPacked(void*,void*,PetscMPIInt*,MPI_Datatype*);
284: SLEPC_EXTERN void MPIAPI SlepcPythag(void*,void*,PetscMPIInt*,MPI_Datatype*);
286: /*
287: BV_CleanCoefficients_Default - Sets to zero all entries of column j of the bv buffer
288: */
289: static inline PetscErrorCode BV_CleanCoefficients_Default(BV bv,PetscInt j,PetscScalar *h)
290: {
291: PetscScalar *hh=h,*a;
292: PetscInt i;
294: PetscFunctionBegin;
295: if (!h) {
296: PetscCall(VecGetArray(bv->buffer,&a));
297: hh = a + j*(bv->nc+bv->m);
298: }
299: for (i=0;i<bv->nc+j;i++) hh[i] = 0.0;
300: if (!h) PetscCall(VecRestoreArray(bv->buffer,&a));
301: PetscFunctionReturn(PETSC_SUCCESS);
302: }
304: /*
305: BV_AddCoefficients_Default - Add the contents of the scratch (0-th column) of the bv buffer
306: into column j of the bv buffer
307: */
308: static inline PetscErrorCode BV_AddCoefficients_Default(BV bv,PetscInt j,PetscScalar *h,PetscScalar *c)
309: {
310: PetscScalar *hh=h,*cc=c;
311: PetscInt i;
313: PetscFunctionBegin;
314: if (!h) {
315: PetscCall(VecGetArray(bv->buffer,&cc));
316: hh = cc + j*(bv->nc+bv->m);
317: }
318: for (i=0;i<bv->nc+j;i++) hh[i] += cc[i];
319: if (!h) PetscCall(VecRestoreArray(bv->buffer,&cc));
320: PetscCall(PetscLogFlops(1.0*(bv->nc+j)));
321: PetscFunctionReturn(PETSC_SUCCESS);
322: }
324: /*
325: BV_SetValue_Default - Sets value in row j (counted after the constraints) of column k
326: of the coefficients array
327: */
328: static inline PetscErrorCode BV_SetValue_Default(BV bv,PetscInt j,PetscInt k,PetscScalar *h,PetscScalar value)
329: {
330: PetscScalar *hh=h,*a;
332: PetscFunctionBegin;
333: if (!h) {
334: PetscCall(VecGetArray(bv->buffer,&a));
335: hh = a + k*(bv->nc+bv->m);
336: }
337: hh[bv->nc+j] = value;
338: if (!h) PetscCall(VecRestoreArray(bv->buffer,&a));
339: PetscFunctionReturn(PETSC_SUCCESS);
340: }
342: /*
343: BV_SquareSum_Default - Returns the value h'*h, where h represents the contents of the
344: coefficients array (up to position j)
345: */
346: static inline PetscErrorCode BV_SquareSum_Default(BV bv,PetscInt j,PetscScalar *h,PetscReal *sum)
347: {
348: PetscScalar *hh=h;
349: PetscInt i;
351: PetscFunctionBegin;
352: *sum = 0.0;
353: if (!h) PetscCall(VecGetArray(bv->buffer,&hh));
354: for (i=0;i<bv->nc+j;i++) *sum += PetscRealPart(hh[i]*PetscConj(hh[i]));
355: if (!h) PetscCall(VecRestoreArray(bv->buffer,&hh));
356: PetscCall(PetscLogFlops(2.0*(bv->nc+j)));
357: PetscFunctionReturn(PETSC_SUCCESS);
358: }
360: /*
361: BV_ApplySignature_Default - Computes the pointwise product h*omega, where h represents
362: the contents of the coefficients array (up to position j) and omega is the signature;
363: if inverse=TRUE then the operation is h/omega
364: */
365: static inline PetscErrorCode BV_ApplySignature_Default(BV bv,PetscInt j,PetscScalar *h,PetscBool inverse)
366: {
367: PetscScalar *hh=h;
368: PetscInt i;
369: const PetscScalar *omega;
371: PetscFunctionBegin;
372: if (PetscUnlikely(!(bv->nc+j))) PetscFunctionReturn(PETSC_SUCCESS);
373: if (!h) PetscCall(VecGetArray(bv->buffer,&hh));
374: PetscCall(VecGetArrayRead(bv->omega,&omega));
375: if (inverse) for (i=0;i<bv->nc+j;i++) hh[i] /= PetscRealPart(omega[i]);
376: else for (i=0;i<bv->nc+j;i++) hh[i] *= PetscRealPart(omega[i]);
377: PetscCall(VecRestoreArrayRead(bv->omega,&omega));
378: if (!h) PetscCall(VecRestoreArray(bv->buffer,&hh));
379: PetscCall(PetscLogFlops(1.0*(bv->nc+j)));
380: PetscFunctionReturn(PETSC_SUCCESS);
381: }
383: /*
384: BV_SquareRoot_Default - Returns the square root of position j (counted after the constraints)
385: of the coefficients array
386: */
387: static inline PetscErrorCode BV_SquareRoot_Default(BV bv,PetscInt j,PetscScalar *h,PetscReal *beta)
388: {
389: PetscScalar *hh=h;
391: PetscFunctionBegin;
392: if (!h) PetscCall(VecGetArray(bv->buffer,&hh));
393: PetscCall(BV_SafeSqrt(bv,hh[bv->nc+j],beta));
394: if (!h) PetscCall(VecRestoreArray(bv->buffer,&hh));
395: PetscFunctionReturn(PETSC_SUCCESS);
396: }
398: /*
399: BV_StoreCoefficients_Default - Copy the contents of the coefficients array to an array dest
400: provided by the caller (only values from l to j are copied)
401: */
402: static inline PetscErrorCode BV_StoreCoefficients_Default(BV bv,PetscInt j,PetscScalar *h,PetscScalar *dest)
403: {
404: PetscScalar *hh=h,*a;
405: PetscInt i;
407: PetscFunctionBegin;
408: if (!h) {
409: PetscCall(VecGetArray(bv->buffer,&a));
410: hh = a + j*(bv->nc+bv->m);
411: }
412: for (i=bv->l;i<j;i++) dest[i-bv->l] = hh[bv->nc+i];
413: if (!h) PetscCall(VecRestoreArray(bv->buffer,&a));
414: PetscFunctionReturn(PETSC_SUCCESS);
415: }
417: /*
418: BV_GetEigenvector - retrieves k-th eigenvector from basis vectors V.
419: The argument eigi is the imaginary part of the corresponding eigenvalue.
420: */
421: static inline PetscErrorCode BV_GetEigenvector(BV V,PetscInt k,PetscScalar eigi,Vec Vr,Vec Vi)
422: {
423: PetscFunctionBegin;
424: #if defined(PETSC_USE_COMPLEX)
425: (void)eigi;
426: if (Vr) PetscCall(BVCopyVec(V,k,Vr));
427: if (Vi) PetscCall(VecSet(Vi,0.0));
428: #else
429: if (eigi > 0.0) { /* first value of conjugate pair */
430: if (Vr) PetscCall(BVCopyVec(V,k,Vr));
431: if (Vi) PetscCall(BVCopyVec(V,k+1,Vi));
432: } else if (eigi < 0.0) { /* second value of conjugate pair */
433: if (Vr) PetscCall(BVCopyVec(V,k-1,Vr));
434: if (Vi) {
435: PetscCall(BVCopyVec(V,k,Vi));
436: PetscCall(VecScale(Vi,-1.0));
437: }
438: } else { /* real eigenvalue */
439: if (Vr) PetscCall(BVCopyVec(V,k,Vr));
440: if (Vi) PetscCall(VecSet(Vi,0.0));
441: }
442: #endif
443: PetscFunctionReturn(PETSC_SUCCESS);
444: }
446: /*
447: BV_OrthogonalizeColumn_Safe - this is intended for cases where we know that
448: the resulting vector is going to be numerically zero, so normalization or
449: iterative refinement may cause problems in parallel (collective operation
450: not being called by all processes)
451: */
452: static inline PetscErrorCode BV_OrthogonalizeColumn_Safe(BV bv,PetscInt j,PetscScalar *H,PetscReal *norm,PetscBool *lindep)
453: {
454: BVOrthogRefineType orthog_ref;
456: PetscFunctionBegin;
457: PetscCall(PetscInfo(bv,"Orthogonalizing column %" PetscInt_FMT " without refinement\n",j));
458: orthog_ref = bv->orthog_ref;
459: bv->orthog_ref = BV_ORTHOG_REFINE_NEVER; /* avoid refinement */
460: PetscCall(BVOrthogonalizeColumn(bv,j,H,NULL,NULL));
461: bv->orthog_ref = orthog_ref; /* restore refinement setting */
462: if (norm) *norm = 0.0;
463: if (lindep) *lindep = PETSC_TRUE;
464: PetscFunctionReturn(PETSC_SUCCESS);
465: }
467: /*
468: BV_SetDefaultLD - set the default value of the leading dimension, based on
469: the local size.
470: */
471: static inline PetscErrorCode BV_SetDefaultLD(BV bv,PetscInt nloc)
472: {
473: size_t bytes,align;
475: PetscFunctionBegin;
476: if (bv->ld) { /* set by user */
477: PetscCheck(bv->ld>=nloc,PetscObjectComm((PetscObject)bv),PETSC_ERR_USER_INPUT,"The leading dimension %" PetscInt_FMT " should be larger or equal to the local number of rows %" PetscInt_FMT,bv->ld,nloc);
478: } else {
479: align = PetscMax(PETSC_MEMALIGN,16); /* assume that CUDA requires 16-byte alignment */
480: bytes = (nloc*sizeof(PetscScalar) + align - 1) & ~(align - 1);
481: PetscCall(PetscIntCast(bytes/sizeof(PetscScalar),&bv->ld));
482: }
483: PetscFunctionReturn(PETSC_SUCCESS);
484: }
486: #if defined(PETSC_HAVE_CUDA)
487: /*
488: BV_MatDenseCUDAGetArrayRead - if Q is MATSEQDENSE it will allocate memory on the
489: GPU and copy the contents; otherwise, calls MatDenseCUDAGetArrayRead()
490: */
491: static inline PetscErrorCode BV_MatDenseCUDAGetArrayRead(BV bv,Mat Q,const PetscScalar **d_q)
492: {
493: const PetscScalar *q;
494: PetscInt ldq,mq;
495: PetscCuBLASInt ldq_=0;
496: PetscBool matiscuda;
498: PetscFunctionBegin;
499: (void)bv; // avoid unused parameter warning
500: PetscCall(MatGetSize(Q,NULL,&mq));
501: PetscCall(MatDenseGetLDA(Q,&ldq));
502: PetscCall(PetscCuBLASIntCast(ldq,&ldq_));
503: PetscCall(PetscObjectTypeCompare((PetscObject)Q,MATSEQDENSECUDA,&matiscuda));
504: if (matiscuda) PetscCall(MatDenseCUDAGetArrayRead(Q,d_q));
505: else {
506: PetscCall(MatDenseGetArrayRead(Q,&q));
507: PetscCallCUDA(cudaMalloc((void**)d_q,ldq*mq*sizeof(PetscScalar)));
508: PetscCallCUDA(cudaMemcpy((void*)*d_q,q,ldq*mq*sizeof(PetscScalar),cudaMemcpyHostToDevice));
509: PetscCall(PetscLogCpuToGpu(ldq*mq*sizeof(PetscScalar)));
510: }
511: PetscFunctionReturn(PETSC_SUCCESS);
512: }
514: /*
515: BV_MatDenseCUDARestoreArrayRead - restores the pointer obtained with BV_MatDenseCUDAGetArrayRead(),
516: freeing the GPU memory in case of MATSEQDENSE
517: */
518: static inline PetscErrorCode BV_MatDenseCUDARestoreArrayRead(BV bv,Mat Q,const PetscScalar **d_q)
519: {
520: PetscBool matiscuda;
522: PetscFunctionBegin;
523: (void)bv; // avoid unused parameter warning
524: PetscCall(PetscObjectTypeCompare((PetscObject)Q,MATSEQDENSECUDA,&matiscuda));
525: if (matiscuda) PetscCall(MatDenseCUDARestoreArrayRead(Q,d_q));
526: else {
527: PetscCall(MatDenseRestoreArrayRead(Q,NULL));
528: PetscCallCUDA(cudaFree((void*)*d_q));
529: *d_q = NULL;
530: }
531: PetscFunctionReturn(PETSC_SUCCESS);
532: }
534: SLEPC_INTERN PetscErrorCode BVMult_BLAS_CUDA(BV,PetscInt,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
535: SLEPC_INTERN PetscErrorCode BVMultVec_BLAS_CUDA(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar,PetscScalar*);
536: SLEPC_INTERN PetscErrorCode BVMultInPlace_BLAS_CUDA(BV,PetscInt,PetscInt,PetscInt,PetscInt,PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscBool);
537: SLEPC_INTERN PetscErrorCode BVAXPY_BLAS_CUDA(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
538: SLEPC_INTERN PetscErrorCode BVDot_BLAS_CUDA(BV,PetscInt,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar*,PetscInt,PetscBool);
539: SLEPC_INTERN PetscErrorCode BVDotVec_BLAS_CUDA(BV,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar*,PetscBool);
540: SLEPC_INTERN PetscErrorCode BVScale_BLAS_CUDA(BV,PetscInt,PetscScalar*,PetscScalar);
541: SLEPC_INTERN PetscErrorCode BVNorm_BLAS_CUDA(BV,PetscInt,const PetscScalar*,PetscReal*);
542: SLEPC_INTERN PetscErrorCode BVNormalize_BLAS_CUDA(BV,PetscInt,PetscInt,PetscScalar*,PetscInt,PetscScalar*);
544: SLEPC_INTERN PetscErrorCode BV_CleanCoefficients_CUDA(BV,PetscInt,PetscScalar*);
545: SLEPC_INTERN PetscErrorCode BV_AddCoefficients_CUDA(BV,PetscInt,PetscScalar*,PetscScalar*);
546: SLEPC_INTERN PetscErrorCode BV_SetValue_CUDA(BV,PetscInt,PetscInt,PetscScalar*,PetscScalar);
547: SLEPC_INTERN PetscErrorCode BV_SquareSum_CUDA(BV,PetscInt,PetscScalar*,PetscReal*);
548: SLEPC_INTERN PetscErrorCode BV_ApplySignature_CUDA(BV,PetscInt,PetscScalar*,PetscBool);
549: SLEPC_INTERN PetscErrorCode BV_SquareRoot_CUDA(BV,PetscInt,PetscScalar*,PetscReal*);
550: SLEPC_INTERN PetscErrorCode BV_StoreCoefficients_CUDA(BV,PetscInt,PetscScalar*,PetscScalar*);
551: #define BV_CleanCoefficients(a,b,c) ((a)->cuda?BV_CleanCoefficients_CUDA:BV_CleanCoefficients_Default)((a),(b),(c))
552: #define BV_AddCoefficients(a,b,c,d) ((a)->cuda?BV_AddCoefficients_CUDA:BV_AddCoefficients_Default)((a),(b),(c),(d))
553: #define BV_SetValue(a,b,c,d,e) ((a)->cuda?BV_SetValue_CUDA:BV_SetValue_Default)((a),(b),(c),(d),(e))
554: #define BV_SquareSum(a,b,c,d) ((a)->cuda?BV_SquareSum_CUDA:BV_SquareSum_Default)((a),(b),(c),(d))
555: #define BV_ApplySignature(a,b,c,d) ((a)->cuda?BV_ApplySignature_CUDA:BV_ApplySignature_Default)((a),(b),(c),(d))
556: #define BV_SquareRoot(a,b,c,d) ((a)->cuda?BV_SquareRoot_CUDA:BV_SquareRoot_Default)((a),(b),(c),(d))
557: #define BV_StoreCoefficients(a,b,c,d) ((a)->cuda?BV_StoreCoefficients_CUDA:BV_StoreCoefficients_Default)((a),(b),(c),(d))
559: #elif defined(PETSC_HAVE_HIP)
560: #include <petscdevice_cupm.h>
561: /*
562: BV_MatDenseHIPGetArrayRead - if Q is MATSEQDENSE it will allocate memory on the
563: GPU and copy the contents; otherwise, calls MatDenseHIPGetArrayRead()
564: */
565: static inline PetscErrorCode BV_MatDenseHIPGetArrayRead(BV bv,Mat Q,const PetscScalar **d_q)
566: {
567: const PetscScalar *q;
568: PetscInt ldq,mq;
569: PetscCuBLASInt ldq_=0;
570: PetscBool matiship;
572: PetscFunctionBegin;
573: (void)bv; // avoid unused parameter warning
574: PetscCall(MatGetSize(Q,NULL,&mq));
575: PetscCall(MatDenseGetLDA(Q,&ldq));
576: PetscCall(PetscHipBLASIntCast(ldq,&ldq_));
577: PetscCall(PetscObjectTypeCompare((PetscObject)Q,MATSEQDENSEHIP,&matiship));
578: if (matiship) PetscCall(MatDenseHIPGetArrayRead(Q,d_q));
579: else {
580: PetscCall(MatDenseGetArrayRead(Q,&q));
581: PetscCallHIP(hipMalloc((void**)d_q,ldq*mq*sizeof(PetscScalar)));
582: PetscCallHIP(hipMemcpy((void*)*d_q,q,ldq*mq*sizeof(PetscScalar),hipMemcpyHostToDevice));
583: PetscCall(PetscLogCpuToGpu(ldq*mq*sizeof(PetscScalar)));
584: }
585: PetscFunctionReturn(PETSC_SUCCESS);
586: }
588: /*
589: BV_MatDenseHIPRestoreArrayRead - restores the pointer obtained with BV_MatDenseHIPGetArrayRead(),
590: freeing the GPU memory in case of MATSEQDENSE
591: */
592: static inline PetscErrorCode BV_MatDenseHIPRestoreArrayRead(BV bv,Mat Q,const PetscScalar **d_q)
593: {
594: PetscBool matiship;
596: PetscFunctionBegin;
597: (void)bv; // avoid unused parameter warning
598: PetscCall(PetscObjectTypeCompare((PetscObject)Q,MATSEQDENSEHIP,&matiship));
599: if (matiship) PetscCall(MatDenseHIPRestoreArrayRead(Q,d_q));
600: else {
601: PetscCall(MatDenseRestoreArrayRead(Q,NULL));
602: PetscCallHIP(hipFree((void*)*d_q));
603: *d_q = NULL;
604: }
605: PetscFunctionReturn(PETSC_SUCCESS);
606: }
608: SLEPC_INTERN PetscErrorCode BVMult_BLAS_HIP(BV,PetscInt,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
609: SLEPC_INTERN PetscErrorCode BVMultVec_BLAS_HIP(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar,PetscScalar*);
610: SLEPC_INTERN PetscErrorCode BVMultInPlace_BLAS_HIP(BV,PetscInt,PetscInt,PetscInt,PetscInt,PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscBool);
611: SLEPC_INTERN PetscErrorCode BVAXPY_BLAS_HIP(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
612: SLEPC_INTERN PetscErrorCode BVDot_BLAS_HIP(BV,PetscInt,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar*,PetscInt,PetscBool);
613: SLEPC_INTERN PetscErrorCode BVDotVec_BLAS_HIP(BV,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar*,PetscBool);
614: SLEPC_INTERN PetscErrorCode BVScale_BLAS_HIP(BV,PetscInt,PetscScalar*,PetscScalar);
615: SLEPC_INTERN PetscErrorCode BVNorm_BLAS_HIP(BV,PetscInt,const PetscScalar*,PetscReal*);
616: SLEPC_INTERN PetscErrorCode BVNormalize_BLAS_HIP(BV,PetscInt,PetscInt,PetscScalar*,PetscInt,PetscScalar*);
618: SLEPC_INTERN PetscErrorCode BV_CleanCoefficients_HIP(BV,PetscInt,PetscScalar*);
619: SLEPC_INTERN PetscErrorCode BV_AddCoefficients_HIP(BV,PetscInt,PetscScalar*,PetscScalar*);
620: SLEPC_INTERN PetscErrorCode BV_SetValue_HIP(BV,PetscInt,PetscInt,PetscScalar*,PetscScalar);
621: SLEPC_INTERN PetscErrorCode BV_SquareSum_HIP(BV,PetscInt,PetscScalar*,PetscReal*);
622: SLEPC_INTERN PetscErrorCode BV_ApplySignature_HIP(BV,PetscInt,PetscScalar*,PetscBool);
623: SLEPC_INTERN PetscErrorCode BV_SquareRoot_HIP(BV,PetscInt,PetscScalar*,PetscReal*);
624: SLEPC_INTERN PetscErrorCode BV_StoreCoefficients_HIP(BV,PetscInt,PetscScalar*,PetscScalar*);
625: #define BV_CleanCoefficients(a,b,c) ((a)->hip?BV_CleanCoefficients_HIP:BV_CleanCoefficients_Default)((a),(b),(c))
626: #define BV_AddCoefficients(a,b,c,d) ((a)->hip?BV_AddCoefficients_HIP:BV_AddCoefficients_Default)((a),(b),(c),(d))
627: #define BV_SetValue(a,b,c,d,e) ((a)->hip?BV_SetValue_HIP:BV_SetValue_Default)((a),(b),(c),(d),(e))
628: #define BV_SquareSum(a,b,c,d) ((a)->hip?BV_SquareSum_HIP:BV_SquareSum_Default)((a),(b),(c),(d))
629: #define BV_ApplySignature(a,b,c,d) ((a)->hip?BV_ApplySignature_HIP:BV_ApplySignature_Default)((a),(b),(c),(d))
630: #define BV_SquareRoot(a,b,c,d) ((a)->hip?BV_SquareRoot_HIP:BV_SquareRoot_Default)((a),(b),(c),(d))
631: #define BV_StoreCoefficients(a,b,c,d) ((a)->hip?BV_StoreCoefficients_HIP:BV_StoreCoefficients_Default)((a),(b),(c),(d))
633: #else /* CPU */
634: #define BV_CleanCoefficients(a,b,c) BV_CleanCoefficients_Default((a),(b),(c))
635: #define BV_AddCoefficients(a,b,c,d) BV_AddCoefficients_Default((a),(b),(c),(d))
636: #define BV_SetValue(a,b,c,d,e) BV_SetValue_Default((a),(b),(c),(d),(e))
637: #define BV_SquareSum(a,b,c,d) BV_SquareSum_Default((a),(b),(c),(d))
638: #define BV_ApplySignature(a,b,c,d) BV_ApplySignature_Default((a),(b),(c),(d))
639: #define BV_SquareRoot(a,b,c,d) BV_SquareRoot_Default((a),(b),(c),(d))
640: #define BV_StoreCoefficients(a,b,c,d) BV_StoreCoefficients_Default((a),(b),(c),(d))
641: #endif /* PETSC_HAVE_CUDA */