Line data Source code
1 : /*
2 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 : SLEPc - Scalable Library for Eigenvalue Problem Computations
4 : Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5 :
6 : This file is part of SLEPc.
7 : SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 : */
10 :
11 : #pragma once
12 :
13 : #include <slepcbv.h>
14 : #include <slepc/private/slepcimpl.h>
15 :
16 : /* SUBMANSEC = BV */
17 :
18 : SLEPC_EXTERN PetscBool BVRegisterAllCalled;
19 : SLEPC_EXTERN PetscErrorCode BVRegisterAll(void);
20 :
21 : SLEPC_EXTERN PetscLogEvent BV_Create,BV_Copy,BV_Mult,BV_MultVec,BV_MultInPlace,BV_Dot,BV_DotVec,BV_Orthogonalize,BV_OrthogonalizeVec,BV_Scale,BV_Norm,BV_NormVec,BV_Normalize,BV_SetRandom,BV_MatMult,BV_MatMultVec,BV_MatProject,BV_SVDAndRank;
22 :
23 : typedef struct _BVOps *BVOps;
24 :
25 : struct _BVOps {
26 : PetscErrorCode (*mult)(BV,PetscScalar,PetscScalar,BV,Mat);
27 : PetscErrorCode (*multvec)(BV,PetscScalar,PetscScalar,Vec,PetscScalar*);
28 : PetscErrorCode (*multinplace)(BV,Mat,PetscInt,PetscInt);
29 : PetscErrorCode (*multinplacetrans)(BV,Mat,PetscInt,PetscInt);
30 : PetscErrorCode (*dot)(BV,BV,Mat);
31 : PetscErrorCode (*dotvec)(BV,Vec,PetscScalar*);
32 : PetscErrorCode (*dotvec_local)(BV,Vec,PetscScalar*);
33 : PetscErrorCode (*dotvec_begin)(BV,Vec,PetscScalar*);
34 : PetscErrorCode (*dotvec_end)(BV,Vec,PetscScalar*);
35 : PetscErrorCode (*scale)(BV,PetscInt,PetscScalar);
36 : PetscErrorCode (*norm)(BV,PetscInt,NormType,PetscReal*);
37 : PetscErrorCode (*norm_local)(BV,PetscInt,NormType,PetscReal*);
38 : PetscErrorCode (*norm_begin)(BV,PetscInt,NormType,PetscReal*);
39 : PetscErrorCode (*norm_end)(BV,PetscInt,NormType,PetscReal*);
40 : PetscErrorCode (*normalize)(BV,PetscScalar*);
41 : PetscErrorCode (*matmult)(BV,Mat,BV);
42 : PetscErrorCode (*copy)(BV,BV);
43 : PetscErrorCode (*copycolumn)(BV,PetscInt,PetscInt);
44 : PetscErrorCode (*resize)(BV,PetscInt,PetscBool);
45 : PetscErrorCode (*getcolumn)(BV,PetscInt,Vec*);
46 : PetscErrorCode (*restorecolumn)(BV,PetscInt,Vec*);
47 : PetscErrorCode (*getarray)(BV,PetscScalar**);
48 : PetscErrorCode (*restorearray)(BV,PetscScalar**);
49 : PetscErrorCode (*getarrayread)(BV,const PetscScalar**);
50 : PetscErrorCode (*restorearrayread)(BV,const PetscScalar**);
51 : PetscErrorCode (*restoresplit)(BV,BV*,BV*);
52 : PetscErrorCode (*restoresplitrows)(BV,IS,IS,BV*,BV*);
53 : PetscErrorCode (*gramschmidt)(BV,PetscInt,Vec,PetscBool*,PetscScalar*,PetscScalar*,PetscReal*,PetscReal*);
54 : PetscErrorCode (*getmat)(BV,Mat*);
55 : PetscErrorCode (*restoremat)(BV,Mat*);
56 : PetscErrorCode (*duplicate)(BV,BV);
57 : PetscErrorCode (*create)(BV);
58 : PetscErrorCode (*setfromoptions)(BV,PetscOptionItems*);
59 : PetscErrorCode (*view)(BV,PetscViewer);
60 : PetscErrorCode (*destroy)(BV);
61 : };
62 :
63 : struct _p_BV {
64 : PETSCHEADER(struct _BVOps);
65 : /*------------------------- User parameters --------------------------*/
66 : PetscLayout map; /* layout of columns */
67 : VecType vtype; /* vector type */
68 : PetscInt n,N; /* dimensions of vectors (local, global) */
69 : PetscInt m; /* number of vectors */
70 : PetscInt l; /* number of leading columns */
71 : PetscInt k; /* number of active columns */
72 : PetscInt nc; /* number of constraints */
73 : PetscInt ld; /* leading dimension */
74 : BVOrthogType orthog_type; /* the method of vector orthogonalization */
75 : BVOrthogRefineType orthog_ref; /* refinement method */
76 : PetscReal orthog_eta; /* refinement threshold */
77 : BVOrthogBlockType orthog_block; /* the method of block orthogonalization */
78 : Mat matrix; /* inner product matrix */
79 : PetscBool indef; /* matrix is indefinite */
80 : BVMatMultType vmm; /* version of matmult operation */
81 : PetscBool rrandom; /* reproducible random vectors */
82 : PetscReal deftol; /* tolerance for BV_SafeSqrt */
83 :
84 : /*---------------------- Cached data and workspace -------------------*/
85 : Vec buffer; /* buffer vector used in orthogonalization */
86 : Mat Abuffer; /* auxiliary seqdense matrix that wraps the buffer */
87 : Vec Bx; /* result of matrix times a vector x */
88 : PetscObjectId xid; /* object id of vector x */
89 : PetscObjectState xstate; /* state of vector x */
90 : Vec cv[2]; /* column vectors obtained with BVGetColumn() */
91 : PetscInt ci[2]; /* column indices of obtained vectors */
92 : PetscObjectState st[2]; /* state of obtained vectors */
93 : PetscObjectId id[2]; /* object id of obtained vectors */
94 : PetscScalar *h,*c; /* orthogonalization coefficients */
95 : Vec omega; /* signature matrix values for indefinite case */
96 : PetscBool defersfo; /* deferred call to setfromoptions */
97 : BV cached; /* cached BV to store result of matrix times BV */
98 : PetscObjectState bvstate; /* state of BV when BVApplyMatrixBV() was called */
99 : BV L,R; /* BV objects obtained with BVGetSplit/Rows() */
100 : PetscObjectState lstate,rstate;/* state of L and R when BVGetSplit/Rows() was called */
101 : PetscInt lsplit; /* value of l when BVGetSplit() was called (-1 if BVGetSplitRows()) */
102 : PetscInt issplit; /* !=0 if BV is from split (1=left, 2=right, -1=top, -2=bottom) */
103 : BV splitparent; /* my parent if I am a split BV */
104 : PetscRandom rand; /* random number generator */
105 : Mat Acreate; /* matrix given at BVCreateFromMat() */
106 : Mat Aget; /* matrix returned for BVGetMat() */
107 : PetscBool cuda; /* true if GPU must be used */
108 : PetscBool sfocalled; /* setfromoptions has been called */
109 : PetscScalar *work;
110 : PetscInt lwork;
111 : void *data;
112 : };
113 :
114 : /*
115 : BV_SafeSqrt - Computes the square root of a scalar value alpha, which is
116 : assumed to be z'*B*z. The result is
117 : if definite inner product: res = sqrt(alpha)
118 : if indefinite inner product: res = sgn(alpha)*sqrt(abs(alpha))
119 : */
120 370407 : static inline PetscErrorCode BV_SafeSqrt(BV bv,PetscScalar alpha,PetscReal *res)
121 : {
122 370407 : PetscReal absal,realp;
123 :
124 370407 : PetscFunctionBegin;
125 370407 : absal = PetscAbsScalar(alpha);
126 370407 : realp = PetscRealPart(alpha);
127 370407 : if (PetscUnlikely(absal<PETSC_MACHINE_EPSILON)) PetscCall(PetscInfo(bv,"Zero norm %g, either the vector is zero or a semi-inner product is being used\n",(double)absal));
128 : #if defined(PETSC_USE_COMPLEX)
129 : PetscCheck(PetscAbsReal(PetscImaginaryPart(alpha))<bv->deftol || PetscAbsReal(PetscImaginaryPart(alpha))/absal<10*bv->deftol,PetscObjectComm((PetscObject)bv),PETSC_ERR_USER_INPUT,"The inner product is not well defined: nonzero imaginary part %g",(double)PetscImaginaryPart(alpha));
130 : #endif
131 370407 : if (PetscUnlikely(bv->indef)) {
132 105802 : *res = (realp<0.0)? -PetscSqrtReal(-realp): PetscSqrtReal(realp);
133 : } else {
134 264605 : PetscCheck(realp>-bv->deftol,PetscObjectComm((PetscObject)bv),PETSC_ERR_USER_INPUT,"The inner product is not well defined: indefinite matrix %g",(double)realp);
135 264605 : *res = (realp<0.0)? 0.0: PetscSqrtReal(realp);
136 : }
137 370407 : PetscFunctionReturn(PETSC_SUCCESS);
138 : }
139 :
140 : /*
141 : BV_IPMatMult - Multiply a vector x by the inner-product matrix, cache the
142 : result in Bx.
143 : */
144 197651 : static inline PetscErrorCode BV_IPMatMult(BV bv,Vec x)
145 : {
146 197651 : PetscFunctionBegin;
147 197651 : if (((PetscObject)x)->id != bv->xid || ((PetscObject)x)->state != bv->xstate) {
148 191405 : if (PetscUnlikely(!bv->Bx)) PetscCall(MatCreateVecs(bv->matrix,&bv->Bx,NULL));
149 191405 : PetscCall(MatMult(bv->matrix,x,bv->Bx));
150 191405 : PetscCall(PetscObjectGetId((PetscObject)x,&bv->xid));
151 191405 : PetscCall(PetscObjectStateGet((PetscObject)x,&bv->xstate));
152 : }
153 197651 : PetscFunctionReturn(PETSC_SUCCESS);
154 : }
155 :
156 : /*
157 : BV_IPMatMultBV - Multiply BV by the inner-product matrix, cache the
158 : result internally in bv->cached.
159 : */
160 302 : static inline PetscErrorCode BV_IPMatMultBV(BV bv)
161 : {
162 302 : PetscFunctionBegin;
163 302 : PetscCall(BVGetCachedBV(bv,&bv->cached));
164 302 : if (((PetscObject)bv)->state != bv->bvstate || bv->l != bv->cached->l || bv->k != bv->cached->k) {
165 302 : PetscCall(BVSetActiveColumns(bv->cached,bv->l,bv->k));
166 302 : if (bv->matrix) PetscCall(BVMatMult(bv,bv->matrix,bv->cached));
167 0 : else PetscCall(BVCopy(bv,bv->cached));
168 302 : bv->bvstate = ((PetscObject)bv)->state;
169 : }
170 302 : PetscFunctionReturn(PETSC_SUCCESS);
171 : }
172 :
173 : /*
174 : BV_AllocateCoeffs - Allocate orthogonalization coefficients if not done already.
175 : */
176 47062 : static inline PetscErrorCode BV_AllocateCoeffs(BV bv)
177 : {
178 47062 : PetscFunctionBegin;
179 47062 : if (!bv->h) PetscCall(PetscMalloc2(bv->nc+bv->m,&bv->h,bv->nc+bv->m,&bv->c));
180 47062 : PetscFunctionReturn(PETSC_SUCCESS);
181 : }
182 :
183 : /*
184 : BV_AllocateSignature - Allocate signature coefficients if not done already.
185 : */
186 283723 : static inline PetscErrorCode BV_AllocateSignature(BV bv)
187 : {
188 283723 : PetscFunctionBegin;
189 283723 : if (bv->indef && !bv->omega) {
190 90 : if (bv->cuda) {
191 : #if defined(PETSC_HAVE_CUDA)
192 : PetscCall(VecCreateSeqCUDA(PETSC_COMM_SELF,bv->nc+bv->m,&bv->omega));
193 : #else
194 0 : SETERRQ(PetscObjectComm((PetscObject)bv),PETSC_ERR_PLIB,"Something wrong happened");
195 : #endif
196 90 : } else PetscCall(VecCreateSeq(PETSC_COMM_SELF,bv->nc+bv->m,&bv->omega));
197 90 : PetscCall(VecSet(bv->omega,1.0));
198 : }
199 283723 : PetscFunctionReturn(PETSC_SUCCESS);
200 : }
201 :
202 : /*
203 : BV_SetMatrixDiagonal - sets the inner product matrix for BV as a diagonal matrix
204 : with the diagonal specified by vector vomega, using the same matrix type as matrix M
205 : */
206 30 : static inline PetscErrorCode BV_SetMatrixDiagonal(BV bv,Vec vomega,Mat M)
207 : {
208 30 : Mat Omega;
209 30 : MatType Mtype;
210 :
211 30 : PetscFunctionBegin;
212 30 : PetscCall(MatGetType(M,&Mtype));
213 30 : PetscCall(MatCreate(PetscObjectComm((PetscObject)bv),&Omega));
214 30 : PetscCall(MatSetSizes(Omega,bv->n,bv->n,bv->N,bv->N));
215 30 : PetscCall(MatSetType(Omega,Mtype));
216 30 : PetscCall(MatDiagonalSet(Omega,vomega,INSERT_VALUES));
217 30 : PetscCall(BVSetMatrix(bv,Omega,PETSC_TRUE));
218 30 : PetscCall(MatDestroy(&Omega));
219 30 : PetscFunctionReturn(PETSC_SUCCESS);
220 : }
221 :
222 : /*
223 : BVAvailableVec: First (0) or second (1) vector available for
224 : getcolumn operation (or -1 if both vectors already fetched).
225 : */
226 : #define BVAvailableVec (((bv->ci[0]==-bv->nc-1)? 0: (bv->ci[1]==-bv->nc-1)? 1: -1))
227 :
228 : /*
229 : Macros to test valid BV arguments
230 : */
231 : #if !defined(PETSC_USE_DEBUG)
232 :
233 : #define BVCheckSizes(h,arg) do {(void)(h);} while (0)
234 : #define BVCheckOp(h,arg,op) do {(void)(h);} while (0)
235 :
236 : #else
237 :
238 : #define BVCheckSizes(h,arg) \
239 : do { \
240 : PetscCheck((h)->m,PetscObjectComm((PetscObject)(h)),PETSC_ERR_ARG_WRONGSTATE,"BV sizes have not been defined: Parameter #%d",arg); \
241 : } while (0)
242 :
243 : #define BVCheckOp(h,arg,op) \
244 : do { \
245 : PetscCheck((h)->ops->op,PetscObjectComm((PetscObject)(h)),PETSC_ERR_SUP,"Operation not implemented in this BV type: Parameter #%d",arg); \
246 : } while (0)
247 :
248 : #endif
249 :
250 : SLEPC_INTERN PetscErrorCode BVView_Vecs(BV,PetscViewer);
251 :
252 : SLEPC_INTERN PetscErrorCode BVAllocateWork_Private(BV,PetscInt);
253 :
254 : SLEPC_INTERN PetscErrorCode BVMult_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
255 : SLEPC_INTERN PetscErrorCode BVMultVec_BLAS_Private(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar,PetscScalar*);
256 : SLEPC_INTERN PetscErrorCode BVMultInPlace_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,PetscInt,PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscBool);
257 : SLEPC_INTERN PetscErrorCode BVMultInPlace_Vecs_Private(BV,PetscInt,PetscInt,PetscInt,Vec*,const PetscScalar*,PetscBool);
258 : SLEPC_INTERN PetscErrorCode BVAXPY_BLAS_Private(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
259 : SLEPC_INTERN PetscErrorCode BVDot_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar*,PetscInt,PetscBool);
260 : SLEPC_INTERN PetscErrorCode BVDotVec_BLAS_Private(BV,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar*,PetscBool);
261 : SLEPC_INTERN PetscErrorCode BVScale_BLAS_Private(BV,PetscInt,PetscScalar*,PetscScalar);
262 : SLEPC_INTERN PetscErrorCode BVNorm_LAPACK_Private(BV,PetscInt,PetscInt,const PetscScalar*,PetscInt,NormType,PetscReal*,PetscBool);
263 : SLEPC_INTERN PetscErrorCode BVNormalize_LAPACK_Private(BV,PetscInt,PetscInt,const PetscScalar*,PetscInt,PetscScalar*,PetscBool);
264 : SLEPC_INTERN PetscErrorCode BVGetMat_Default(BV,Mat*);
265 : SLEPC_INTERN PetscErrorCode BVRestoreMat_Default(BV,Mat*);
266 : SLEPC_INTERN PetscErrorCode BVMatCholInv_LAPACK_Private(BV,Mat,Mat);
267 : SLEPC_INTERN PetscErrorCode BVMatTriInv_LAPACK_Private(BV,Mat,Mat);
268 : SLEPC_INTERN PetscErrorCode BVMatSVQB_LAPACK_Private(BV,Mat,Mat);
269 : SLEPC_INTERN PetscErrorCode BVOrthogonalize_LAPACK_TSQR(BV,PetscInt,PetscInt,PetscScalar*,PetscInt,PetscScalar*,PetscInt);
270 : SLEPC_INTERN PetscErrorCode BVOrthogonalize_LAPACK_TSQR_OnlyR(BV,PetscInt,PetscInt,PetscScalar*,PetscInt,PetscScalar*,PetscInt);
271 :
272 : /* reduction operations used in BVOrthogonalize and BVNormalize */
273 : SLEPC_EXTERN MPI_Op MPIU_TSQR, MPIU_LAPY2;
274 : SLEPC_EXTERN void MPIAPI SlepcGivensPacked(void*,void*,PetscMPIInt*,MPI_Datatype*);
275 : SLEPC_EXTERN void MPIAPI SlepcPythag(void*,void*,PetscMPIInt*,MPI_Datatype*);
276 :
277 : /*
278 : BV_CleanCoefficients_Default - Sets to zero all entries of column j of the bv buffer
279 : */
280 281017 : static inline PetscErrorCode BV_CleanCoefficients_Default(BV bv,PetscInt j,PetscScalar *h)
281 : {
282 281017 : PetscScalar *hh=h,*a;
283 281017 : PetscInt i;
284 :
285 281017 : PetscFunctionBegin;
286 281017 : if (!h) {
287 233967 : PetscCall(VecGetArray(bv->buffer,&a));
288 233967 : hh = a + j*(bv->nc+bv->m);
289 : }
290 4146724 : for (i=0;i<bv->nc+j;i++) hh[i] = 0.0;
291 281017 : if (!h) PetscCall(VecRestoreArray(bv->buffer,&a));
292 281017 : PetscFunctionReturn(PETSC_SUCCESS);
293 : }
294 :
295 : /*
296 : BV_AddCoefficients_Default - Add the contents of the scratch (0-th column) of the bv buffer
297 : into column j of the bv buffer
298 : */
299 447995 : static inline PetscErrorCode BV_AddCoefficients_Default(BV bv,PetscInt j,PetscScalar *h,PetscScalar *c)
300 : {
301 447995 : PetscScalar *hh=h,*cc=c;
302 447995 : PetscInt i;
303 :
304 447995 : PetscFunctionBegin;
305 447995 : if (!h) {
306 395997 : PetscCall(VecGetArray(bv->buffer,&cc));
307 395997 : hh = cc + j*(bv->nc+bv->m);
308 : }
309 7401604 : for (i=0;i<bv->nc+j;i++) hh[i] += cc[i];
310 447995 : if (!h) PetscCall(VecRestoreArray(bv->buffer,&cc));
311 447995 : PetscCall(PetscLogFlops(1.0*(bv->nc+j)));
312 447995 : PetscFunctionReturn(PETSC_SUCCESS);
313 : }
314 :
315 : /*
316 : BV_SetValue_Default - Sets value in row j (counted after the constraints) of column k
317 : of the coefficients array
318 : */
319 321369 : static inline PetscErrorCode BV_SetValue_Default(BV bv,PetscInt j,PetscInt k,PetscScalar *h,PetscScalar value)
320 : {
321 321369 : PetscScalar *hh=h,*a;
322 :
323 321369 : PetscFunctionBegin;
324 321369 : if (!h) {
325 319171 : PetscCall(VecGetArray(bv->buffer,&a));
326 319171 : hh = a + k*(bv->nc+bv->m);
327 : }
328 321369 : hh[bv->nc+j] = value;
329 321369 : if (!h) PetscCall(VecRestoreArray(bv->buffer,&a));
330 321369 : PetscFunctionReturn(PETSC_SUCCESS);
331 : }
332 :
333 : /*
334 : BV_SquareSum_Default - Returns the value h'*h, where h represents the contents of the
335 : coefficients array (up to position j)
336 : */
337 305592 : static inline PetscErrorCode BV_SquareSum_Default(BV bv,PetscInt j,PetscScalar *h,PetscReal *sum)
338 : {
339 305592 : PetscScalar *hh=h;
340 305592 : PetscInt i;
341 :
342 305592 : PetscFunctionBegin;
343 305592 : *sum = 0.0;
344 305592 : if (!h) PetscCall(VecGetArray(bv->buffer,&hh));
345 3978009 : for (i=0;i<bv->nc+j;i++) *sum += PetscRealPart(hh[i]*PetscConj(hh[i]));
346 305592 : if (!h) PetscCall(VecRestoreArray(bv->buffer,&hh));
347 305592 : PetscCall(PetscLogFlops(2.0*(bv->nc+j)));
348 305592 : PetscFunctionReturn(PETSC_SUCCESS);
349 : }
350 :
351 : /*
352 : BV_ApplySignature_Default - Computes the pointwise product h*omega, where h represents
353 : the contents of the coefficients array (up to position j) and omega is the signature;
354 : if inverse=TRUE then the operation is h/omega
355 : */
356 205138 : static inline PetscErrorCode BV_ApplySignature_Default(BV bv,PetscInt j,PetscScalar *h,PetscBool inverse)
357 : {
358 205138 : PetscScalar *hh=h;
359 205138 : PetscInt i;
360 205138 : const PetscScalar *omega;
361 :
362 205138 : PetscFunctionBegin;
363 205138 : if (PetscUnlikely(!(bv->nc+j))) PetscFunctionReturn(PETSC_SUCCESS);
364 204974 : if (!h) PetscCall(VecGetArray(bv->buffer,&hh));
365 204974 : PetscCall(VecGetArrayRead(bv->omega,&omega));
366 3000230 : if (inverse) for (i=0;i<bv->nc+j;i++) hh[i] /= PetscRealPart(omega[i]);
367 2897743 : else for (i=0;i<bv->nc+j;i++) hh[i] *= PetscRealPart(omega[i]);
368 204974 : PetscCall(VecRestoreArrayRead(bv->omega,&omega));
369 204974 : if (!h) PetscCall(VecRestoreArray(bv->buffer,&hh));
370 204974 : PetscCall(PetscLogFlops(1.0*(bv->nc+j)));
371 204974 : PetscFunctionReturn(PETSC_SUCCESS);
372 : }
373 :
374 : /*
375 : BV_SquareRoot_Default - Returns the square root of position j (counted after the constraints)
376 : of the coefficients array
377 : */
378 306378 : static inline PetscErrorCode BV_SquareRoot_Default(BV bv,PetscInt j,PetscScalar *h,PetscReal *beta)
379 : {
380 306378 : PetscScalar *hh=h;
381 :
382 306378 : PetscFunctionBegin;
383 306378 : if (!h) PetscCall(VecGetArray(bv->buffer,&hh));
384 306378 : PetscCall(BV_SafeSqrt(bv,hh[bv->nc+j],beta));
385 306378 : if (!h) PetscCall(VecRestoreArray(bv->buffer,&hh));
386 306378 : PetscFunctionReturn(PETSC_SUCCESS);
387 : }
388 :
389 : /*
390 : BV_StoreCoefficients_Default - Copy the contents of the coefficients array to an array dest
391 : provided by the caller (only values from l to j are copied)
392 : */
393 113175 : static inline PetscErrorCode BV_StoreCoefficients_Default(BV bv,PetscInt j,PetscScalar *h,PetscScalar *dest)
394 : {
395 113175 : PetscScalar *hh=h,*a;
396 113175 : PetscInt i;
397 :
398 113175 : PetscFunctionBegin;
399 113175 : if (!h) {
400 84388 : PetscCall(VecGetArray(bv->buffer,&a));
401 84388 : hh = a + j*(bv->nc+bv->m);
402 : }
403 1997640 : for (i=bv->l;i<j;i++) dest[i-bv->l] = hh[bv->nc+i];
404 113175 : if (!h) PetscCall(VecRestoreArray(bv->buffer,&a));
405 113175 : PetscFunctionReturn(PETSC_SUCCESS);
406 : }
407 :
408 : /*
409 : BV_GetEigenvector - retrieves k-th eigenvector from basis vectors V.
410 : The argument eigi is the imaginary part of the corresponding eigenvalue.
411 : */
412 8710 : static inline PetscErrorCode BV_GetEigenvector(BV V,PetscInt k,PetscScalar eigi,Vec Vr,Vec Vi)
413 : {
414 8710 : PetscFunctionBegin;
415 : #if defined(PETSC_USE_COMPLEX)
416 : if (Vr) PetscCall(BVCopyVec(V,k,Vr));
417 : if (Vi) PetscCall(VecSet(Vi,0.0));
418 : #else
419 8710 : if (eigi > 0.0) { /* first value of conjugate pair */
420 183 : if (Vr) PetscCall(BVCopyVec(V,k,Vr));
421 183 : if (Vi) PetscCall(BVCopyVec(V,k+1,Vi));
422 8527 : } else if (eigi < 0.0) { /* second value of conjugate pair */
423 140 : if (Vr) PetscCall(BVCopyVec(V,k-1,Vr));
424 140 : if (Vi) {
425 140 : PetscCall(BVCopyVec(V,k,Vi));
426 140 : PetscCall(VecScale(Vi,-1.0));
427 : }
428 : } else { /* real eigenvalue */
429 8387 : if (Vr) PetscCall(BVCopyVec(V,k,Vr));
430 8387 : if (Vi) PetscCall(VecSet(Vi,0.0));
431 : }
432 : #endif
433 8710 : PetscFunctionReturn(PETSC_SUCCESS);
434 : }
435 :
436 : /*
437 : BV_OrthogonalizeColumn_Safe - this is intended for cases where we know that
438 : the resulting vector is going to be numerically zero, so normalization or
439 : iterative refinement may cause problems in parallel (collective operation
440 : not being called by all processes)
441 : */
442 40 : static inline PetscErrorCode BV_OrthogonalizeColumn_Safe(BV bv,PetscInt j,PetscScalar *H,PetscReal *norm,PetscBool *lindep)
443 : {
444 40 : BVOrthogRefineType orthog_ref;
445 :
446 40 : PetscFunctionBegin;
447 40 : PetscCall(PetscInfo(bv,"Orthogonalizing column %" PetscInt_FMT " without refinement\n",j));
448 40 : orthog_ref = bv->orthog_ref;
449 40 : bv->orthog_ref = BV_ORTHOG_REFINE_NEVER; /* avoid refinement */
450 40 : PetscCall(BVOrthogonalizeColumn(bv,j,H,NULL,NULL));
451 40 : bv->orthog_ref = orthog_ref; /* restore refinement setting */
452 40 : if (norm) *norm = 0.0;
453 40 : if (lindep) *lindep = PETSC_TRUE;
454 40 : PetscFunctionReturn(PETSC_SUCCESS);
455 : }
456 :
457 : /*
458 : BV_SetDefaultLD - set the default value of the leading dimension, based on
459 : the local size.
460 : */
461 13773 : static inline PetscErrorCode BV_SetDefaultLD(BV bv,PetscInt nloc)
462 : {
463 13773 : size_t bytes,align;
464 :
465 13773 : PetscFunctionBegin;
466 13773 : if (bv->ld) { /* set by user */
467 11680 : PetscCheck(bv->ld>=nloc,PetscObjectComm((PetscObject)bv),PETSC_ERR_USER_INPUT,"The leading dimension %" PetscInt_FMT " should be larger or equal to the local number of rows %" PetscInt_FMT,bv->ld,nloc);
468 : } else {
469 2093 : align = PetscMax(PETSC_MEMALIGN,16); /* assume that CUDA requires 16-byte alignment */
470 2093 : bytes = (nloc*sizeof(PetscScalar) + align - 1) & ~(align - 1);
471 2093 : bv->ld = bytes/sizeof(PetscScalar);
472 : }
473 13773 : PetscFunctionReturn(PETSC_SUCCESS);
474 : }
475 :
476 : #if defined(PETSC_HAVE_CUDA)
477 : /*
478 : BV_MatDenseCUDAGetArrayRead - if Q is MATSEQDENSE it will allocate memory on the
479 : GPU and copy the contents; otherwise, calls MatDenseCUDAGetArrayRead()
480 : */
481 : static inline PetscErrorCode BV_MatDenseCUDAGetArrayRead(BV bv,Mat Q,const PetscScalar **d_q)
482 : {
483 : const PetscScalar *q;
484 : PetscInt ldq,mq;
485 : PetscCuBLASInt ldq_=0;
486 : PetscBool matiscuda;
487 :
488 : PetscFunctionBegin;
489 : (void)bv; // avoid unused parameter warning
490 : PetscCall(MatGetSize(Q,NULL,&mq));
491 : PetscCall(MatDenseGetLDA(Q,&ldq));
492 : PetscCall(PetscCuBLASIntCast(ldq,&ldq_));
493 : PetscCall(PetscObjectTypeCompare((PetscObject)Q,MATSEQDENSECUDA,&matiscuda));
494 : if (matiscuda) PetscCall(MatDenseCUDAGetArrayRead(Q,d_q));
495 : else {
496 : PetscCall(MatDenseGetArrayRead(Q,&q));
497 : PetscCallCUDA(cudaMalloc((void**)d_q,ldq*mq*sizeof(PetscScalar)));
498 : PetscCallCUDA(cudaMemcpy((void*)*d_q,q,ldq*mq*sizeof(PetscScalar),cudaMemcpyHostToDevice));
499 : PetscCall(PetscLogCpuToGpu(ldq*mq*sizeof(PetscScalar)));
500 : }
501 : PetscFunctionReturn(PETSC_SUCCESS);
502 : }
503 :
504 : /*
505 : BV_MatDenseCUDARestoreArrayRead - restores the pointer obtained with BV_MatDenseCUDAGetArrayRead(),
506 : freeing the GPU memory in case of MATSEQDENSE
507 : */
508 : static inline PetscErrorCode BV_MatDenseCUDARestoreArrayRead(BV bv,Mat Q,const PetscScalar **d_q)
509 : {
510 : PetscBool matiscuda;
511 :
512 : PetscFunctionBegin;
513 : (void)bv; // avoid unused parameter warning
514 : PetscCall(PetscObjectTypeCompare((PetscObject)Q,MATSEQDENSECUDA,&matiscuda));
515 : if (matiscuda) PetscCall(MatDenseCUDARestoreArrayRead(Q,d_q));
516 : else {
517 : PetscCall(MatDenseRestoreArrayRead(Q,NULL));
518 : PetscCallCUDA(cudaFree((void*)*d_q));
519 : *d_q = NULL;
520 : }
521 : PetscFunctionReturn(PETSC_SUCCESS);
522 : }
523 :
524 : SLEPC_INTERN PetscErrorCode BVMult_BLAS_CUDA(BV,PetscInt,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
525 : SLEPC_INTERN PetscErrorCode BVMultVec_BLAS_CUDA(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar,PetscScalar*);
526 : SLEPC_INTERN PetscErrorCode BVMultInPlace_BLAS_CUDA(BV,PetscInt,PetscInt,PetscInt,PetscInt,PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscBool);
527 : SLEPC_INTERN PetscErrorCode BVAXPY_BLAS_CUDA(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
528 : SLEPC_INTERN PetscErrorCode BVDot_BLAS_CUDA(BV,PetscInt,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar*,PetscInt,PetscBool);
529 : SLEPC_INTERN PetscErrorCode BVDotVec_BLAS_CUDA(BV,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar*,PetscBool);
530 : SLEPC_INTERN PetscErrorCode BVScale_BLAS_CUDA(BV,PetscInt,PetscScalar*,PetscScalar);
531 : SLEPC_INTERN PetscErrorCode BVNorm_BLAS_CUDA(BV,PetscInt,const PetscScalar*,PetscReal*);
532 : SLEPC_INTERN PetscErrorCode BVNormalize_BLAS_CUDA(BV,PetscInt,PetscInt,PetscScalar*,PetscInt,PetscScalar*);
533 :
534 : SLEPC_INTERN PetscErrorCode BV_CleanCoefficients_CUDA(BV,PetscInt,PetscScalar*);
535 : SLEPC_INTERN PetscErrorCode BV_AddCoefficients_CUDA(BV,PetscInt,PetscScalar*,PetscScalar*);
536 : SLEPC_INTERN PetscErrorCode BV_SetValue_CUDA(BV,PetscInt,PetscInt,PetscScalar*,PetscScalar);
537 : SLEPC_INTERN PetscErrorCode BV_SquareSum_CUDA(BV,PetscInt,PetscScalar*,PetscReal*);
538 : SLEPC_INTERN PetscErrorCode BV_ApplySignature_CUDA(BV,PetscInt,PetscScalar*,PetscBool);
539 : SLEPC_INTERN PetscErrorCode BV_SquareRoot_CUDA(BV,PetscInt,PetscScalar*,PetscReal*);
540 : SLEPC_INTERN PetscErrorCode BV_StoreCoefficients_CUDA(BV,PetscInt,PetscScalar*,PetscScalar*);
541 : #define BV_CleanCoefficients(a,b,c) ((a)->cuda?BV_CleanCoefficients_CUDA:BV_CleanCoefficients_Default)((a),(b),(c))
542 : #define BV_AddCoefficients(a,b,c,d) ((a)->cuda?BV_AddCoefficients_CUDA:BV_AddCoefficients_Default)((a),(b),(c),(d))
543 : #define BV_SetValue(a,b,c,d,e) ((a)->cuda?BV_SetValue_CUDA:BV_SetValue_Default)((a),(b),(c),(d),(e))
544 : #define BV_SquareSum(a,b,c,d) ((a)->cuda?BV_SquareSum_CUDA:BV_SquareSum_Default)((a),(b),(c),(d))
545 : #define BV_ApplySignature(a,b,c,d) ((a)->cuda?BV_ApplySignature_CUDA:BV_ApplySignature_Default)((a),(b),(c),(d))
546 : #define BV_SquareRoot(a,b,c,d) ((a)->cuda?BV_SquareRoot_CUDA:BV_SquareRoot_Default)((a),(b),(c),(d))
547 : #define BV_StoreCoefficients(a,b,c,d) ((a)->cuda?BV_StoreCoefficients_CUDA:BV_StoreCoefficients_Default)((a),(b),(c),(d))
548 : #else
549 : #define BV_CleanCoefficients(a,b,c) BV_CleanCoefficients_Default((a),(b),(c))
550 : #define BV_AddCoefficients(a,b,c,d) BV_AddCoefficients_Default((a),(b),(c),(d))
551 : #define BV_SetValue(a,b,c,d,e) BV_SetValue_Default((a),(b),(c),(d),(e))
552 : #define BV_SquareSum(a,b,c,d) BV_SquareSum_Default((a),(b),(c),(d))
553 : #define BV_ApplySignature(a,b,c,d) BV_ApplySignature_Default((a),(b),(c),(d))
554 : #define BV_SquareRoot(a,b,c,d) BV_SquareRoot_Default((a),(b),(c),(d))
555 : #define BV_StoreCoefficients(a,b,c,d) BV_StoreCoefficients_Default((a),(b),(c),(d))
556 : #endif /* PETSC_HAVE_CUDA */
|