LCOV - code coverage report
Current view: top level - home/gitlab-runner/builds/q8svuz_Y/0/slepc/slepc/include/slepc/private - bvimpl.h (source / functions) Hit Total Coverage
Test: SLEPc Lines: 154 156 98.7 %
Date: 2024-05-01 00:51:33 Functions: 16 16 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       3             :    SLEPc - Scalable Library for Eigenvalue Problem Computations
       4             :    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
       5             : 
       6             :    This file is part of SLEPc.
       7             :    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
       8             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       9             : */
      10             : 
      11             : #pragma once
      12             : 
      13             : #include <slepcbv.h>
      14             : #include <slepc/private/slepcimpl.h>
      15             : 
      16             : /* SUBMANSEC = BV */
      17             : 
      18             : SLEPC_EXTERN PetscBool BVRegisterAllCalled;
      19             : SLEPC_EXTERN PetscErrorCode BVRegisterAll(void);
      20             : 
      21             : SLEPC_EXTERN PetscLogEvent BV_Create,BV_Copy,BV_Mult,BV_MultVec,BV_MultInPlace,BV_Dot,BV_DotVec,BV_Orthogonalize,BV_OrthogonalizeVec,BV_Scale,BV_Norm,BV_NormVec,BV_Normalize,BV_SetRandom,BV_MatMult,BV_MatMultVec,BV_MatProject,BV_SVDAndRank;
      22             : 
      23             : typedef struct _BVOps *BVOps;
      24             : 
      25             : struct _BVOps {
      26             :   PetscErrorCode (*mult)(BV,PetscScalar,PetscScalar,BV,Mat);
      27             :   PetscErrorCode (*multvec)(BV,PetscScalar,PetscScalar,Vec,PetscScalar*);
      28             :   PetscErrorCode (*multinplace)(BV,Mat,PetscInt,PetscInt);
      29             :   PetscErrorCode (*multinplacetrans)(BV,Mat,PetscInt,PetscInt);
      30             :   PetscErrorCode (*dot)(BV,BV,Mat);
      31             :   PetscErrorCode (*dotvec)(BV,Vec,PetscScalar*);
      32             :   PetscErrorCode (*dotvec_local)(BV,Vec,PetscScalar*);
      33             :   PetscErrorCode (*dotvec_begin)(BV,Vec,PetscScalar*);
      34             :   PetscErrorCode (*dotvec_end)(BV,Vec,PetscScalar*);
      35             :   PetscErrorCode (*scale)(BV,PetscInt,PetscScalar);
      36             :   PetscErrorCode (*norm)(BV,PetscInt,NormType,PetscReal*);
      37             :   PetscErrorCode (*norm_local)(BV,PetscInt,NormType,PetscReal*);
      38             :   PetscErrorCode (*norm_begin)(BV,PetscInt,NormType,PetscReal*);
      39             :   PetscErrorCode (*norm_end)(BV,PetscInt,NormType,PetscReal*);
      40             :   PetscErrorCode (*normalize)(BV,PetscScalar*);
      41             :   PetscErrorCode (*matmult)(BV,Mat,BV);
      42             :   PetscErrorCode (*copy)(BV,BV);
      43             :   PetscErrorCode (*copycolumn)(BV,PetscInt,PetscInt);
      44             :   PetscErrorCode (*resize)(BV,PetscInt,PetscBool);
      45             :   PetscErrorCode (*getcolumn)(BV,PetscInt,Vec*);
      46             :   PetscErrorCode (*restorecolumn)(BV,PetscInt,Vec*);
      47             :   PetscErrorCode (*getarray)(BV,PetscScalar**);
      48             :   PetscErrorCode (*restorearray)(BV,PetscScalar**);
      49             :   PetscErrorCode (*getarrayread)(BV,const PetscScalar**);
      50             :   PetscErrorCode (*restorearrayread)(BV,const PetscScalar**);
      51             :   PetscErrorCode (*restoresplit)(BV,BV*,BV*);
      52             :   PetscErrorCode (*restoresplitrows)(BV,IS,IS,BV*,BV*);
      53             :   PetscErrorCode (*gramschmidt)(BV,PetscInt,Vec,PetscBool*,PetscScalar*,PetscScalar*,PetscReal*,PetscReal*);
      54             :   PetscErrorCode (*getmat)(BV,Mat*);
      55             :   PetscErrorCode (*restoremat)(BV,Mat*);
      56             :   PetscErrorCode (*duplicate)(BV,BV);
      57             :   PetscErrorCode (*create)(BV);
      58             :   PetscErrorCode (*setfromoptions)(BV,PetscOptionItems*);
      59             :   PetscErrorCode (*view)(BV,PetscViewer);
      60             :   PetscErrorCode (*destroy)(BV);
      61             : };
      62             : 
      63             : struct _p_BV {
      64             :   PETSCHEADER(struct _BVOps);
      65             :   /*------------------------- User parameters --------------------------*/
      66             :   PetscLayout        map;          /* layout of columns */
      67             :   VecType            vtype;        /* vector type */
      68             :   PetscInt           n,N;          /* dimensions of vectors (local, global) */
      69             :   PetscInt           m;            /* number of vectors */
      70             :   PetscInt           l;            /* number of leading columns */
      71             :   PetscInt           k;            /* number of active columns */
      72             :   PetscInt           nc;           /* number of constraints */
      73             :   PetscInt           ld;           /* leading dimension */
      74             :   BVOrthogType       orthog_type;  /* the method of vector orthogonalization */
      75             :   BVOrthogRefineType orthog_ref;   /* refinement method */
      76             :   PetscReal          orthog_eta;   /* refinement threshold */
      77             :   BVOrthogBlockType  orthog_block; /* the method of block orthogonalization */
      78             :   Mat                matrix;       /* inner product matrix */
      79             :   PetscBool          indef;        /* matrix is indefinite */
      80             :   BVMatMultType      vmm;          /* version of matmult operation */
      81             :   PetscBool          rrandom;      /* reproducible random vectors */
      82             :   PetscReal          deftol;       /* tolerance for BV_SafeSqrt */
      83             : 
      84             :   /*---------------------- Cached data and workspace -------------------*/
      85             :   Vec                buffer;       /* buffer vector used in orthogonalization */
      86             :   Mat                Abuffer;      /* auxiliary seqdense matrix that wraps the buffer */
      87             :   Vec                Bx;           /* result of matrix times a vector x */
      88             :   PetscObjectId      xid;          /* object id of vector x */
      89             :   PetscObjectState   xstate;       /* state of vector x */
      90             :   Vec                cv[2];        /* column vectors obtained with BVGetColumn() */
      91             :   PetscInt           ci[2];        /* column indices of obtained vectors */
      92             :   PetscObjectState   st[2];        /* state of obtained vectors */
      93             :   PetscObjectId      id[2];        /* object id of obtained vectors */
      94             :   PetscScalar        *h,*c;        /* orthogonalization coefficients */
      95             :   Vec                omega;        /* signature matrix values for indefinite case */
      96             :   PetscBool          defersfo;     /* deferred call to setfromoptions */
      97             :   BV                 cached;       /* cached BV to store result of matrix times BV */
      98             :   PetscObjectState   bvstate;      /* state of BV when BVApplyMatrixBV() was called */
      99             :   BV                 L,R;          /* BV objects obtained with BVGetSplit/Rows() */
     100             :   PetscObjectState   lstate,rstate;/* state of L and R when BVGetSplit/Rows() was called */
     101             :   PetscInt           lsplit;       /* value of l when BVGetSplit() was called (-1 if BVGetSplitRows()) */
     102             :   PetscInt           issplit;      /* !=0 if BV is from split (1=left, 2=right, -1=top, -2=bottom) */
     103             :   BV                 splitparent;  /* my parent if I am a split BV */
     104             :   PetscRandom        rand;         /* random number generator */
     105             :   Mat                Acreate;      /* matrix given at BVCreateFromMat() */
     106             :   Mat                Aget;         /* matrix returned for BVGetMat() */
     107             :   PetscBool          cuda;         /* true if GPU must be used */
     108             :   PetscBool          sfocalled;    /* setfromoptions has been called */
     109             :   PetscScalar        *work;
     110             :   PetscInt           lwork;
     111             :   void               *data;
     112             : };
     113             : 
     114             : /*
     115             :   BV_SafeSqrt - Computes the square root of a scalar value alpha, which is
     116             :   assumed to be z'*B*z. The result is
     117             :     if definite inner product:     res = sqrt(alpha)
     118             :     if indefinite inner product:   res = sgn(alpha)*sqrt(abs(alpha))
     119             : */
     120      370407 : static inline PetscErrorCode BV_SafeSqrt(BV bv,PetscScalar alpha,PetscReal *res)
     121             : {
     122      370407 :   PetscReal      absal,realp;
     123             : 
     124      370407 :   PetscFunctionBegin;
     125      370407 :   absal = PetscAbsScalar(alpha);
     126      370407 :   realp = PetscRealPart(alpha);
     127      370407 :   if (PetscUnlikely(absal<PETSC_MACHINE_EPSILON)) PetscCall(PetscInfo(bv,"Zero norm %g, either the vector is zero or a semi-inner product is being used\n",(double)absal));
     128             : #if defined(PETSC_USE_COMPLEX)
     129             :   PetscCheck(PetscAbsReal(PetscImaginaryPart(alpha))<bv->deftol || PetscAbsReal(PetscImaginaryPart(alpha))/absal<10*bv->deftol,PetscObjectComm((PetscObject)bv),PETSC_ERR_USER_INPUT,"The inner product is not well defined: nonzero imaginary part %g",(double)PetscImaginaryPart(alpha));
     130             : #endif
     131      370407 :   if (PetscUnlikely(bv->indef)) {
     132      105802 :     *res = (realp<0.0)? -PetscSqrtReal(-realp): PetscSqrtReal(realp);
     133             :   } else {
     134      264605 :     PetscCheck(realp>-bv->deftol,PetscObjectComm((PetscObject)bv),PETSC_ERR_USER_INPUT,"The inner product is not well defined: indefinite matrix %g",(double)realp);
     135      264605 :     *res = (realp<0.0)? 0.0: PetscSqrtReal(realp);
     136             :   }
     137      370407 :   PetscFunctionReturn(PETSC_SUCCESS);
     138             : }
     139             : 
     140             : /*
     141             :   BV_IPMatMult - Multiply a vector x by the inner-product matrix, cache the
     142             :   result in Bx.
     143             : */
     144      197651 : static inline PetscErrorCode BV_IPMatMult(BV bv,Vec x)
     145             : {
     146      197651 :   PetscFunctionBegin;
     147      197651 :   if (((PetscObject)x)->id != bv->xid || ((PetscObject)x)->state != bv->xstate) {
     148      191405 :     if (PetscUnlikely(!bv->Bx)) PetscCall(MatCreateVecs(bv->matrix,&bv->Bx,NULL));
     149      191405 :     PetscCall(MatMult(bv->matrix,x,bv->Bx));
     150      191405 :     PetscCall(PetscObjectGetId((PetscObject)x,&bv->xid));
     151      191405 :     PetscCall(PetscObjectStateGet((PetscObject)x,&bv->xstate));
     152             :   }
     153      197651 :   PetscFunctionReturn(PETSC_SUCCESS);
     154             : }
     155             : 
     156             : /*
     157             :   BV_IPMatMultBV - Multiply BV by the inner-product matrix, cache the
     158             :   result internally in bv->cached.
     159             : */
     160         302 : static inline PetscErrorCode BV_IPMatMultBV(BV bv)
     161             : {
     162         302 :   PetscFunctionBegin;
     163         302 :   PetscCall(BVGetCachedBV(bv,&bv->cached));
     164         302 :   if (((PetscObject)bv)->state != bv->bvstate || bv->l != bv->cached->l || bv->k != bv->cached->k) {
     165         302 :     PetscCall(BVSetActiveColumns(bv->cached,bv->l,bv->k));
     166         302 :     if (bv->matrix) PetscCall(BVMatMult(bv,bv->matrix,bv->cached));
     167           0 :     else PetscCall(BVCopy(bv,bv->cached));
     168         302 :     bv->bvstate = ((PetscObject)bv)->state;
     169             :   }
     170         302 :   PetscFunctionReturn(PETSC_SUCCESS);
     171             : }
     172             : 
     173             : /*
     174             :   BV_AllocateCoeffs - Allocate orthogonalization coefficients if not done already.
     175             : */
     176       47062 : static inline PetscErrorCode BV_AllocateCoeffs(BV bv)
     177             : {
     178       47062 :   PetscFunctionBegin;
     179       47062 :   if (!bv->h) PetscCall(PetscMalloc2(bv->nc+bv->m,&bv->h,bv->nc+bv->m,&bv->c));
     180       47062 :   PetscFunctionReturn(PETSC_SUCCESS);
     181             : }
     182             : 
     183             : /*
     184             :   BV_AllocateSignature - Allocate signature coefficients if not done already.
     185             : */
     186      283723 : static inline PetscErrorCode BV_AllocateSignature(BV bv)
     187             : {
     188      283723 :   PetscFunctionBegin;
     189      283723 :   if (bv->indef && !bv->omega) {
     190          90 :     if (bv->cuda) {
     191             : #if defined(PETSC_HAVE_CUDA)
     192             :       PetscCall(VecCreateSeqCUDA(PETSC_COMM_SELF,bv->nc+bv->m,&bv->omega));
     193             : #else
     194           0 :       SETERRQ(PetscObjectComm((PetscObject)bv),PETSC_ERR_PLIB,"Something wrong happened");
     195             : #endif
     196          90 :     } else PetscCall(VecCreateSeq(PETSC_COMM_SELF,bv->nc+bv->m,&bv->omega));
     197          90 :     PetscCall(VecSet(bv->omega,1.0));
     198             :   }
     199      283723 :   PetscFunctionReturn(PETSC_SUCCESS);
     200             : }
     201             : 
     202             : /*
     203             :   BV_SetMatrixDiagonal - sets the inner product matrix for BV as a diagonal matrix
     204             :   with the diagonal specified by vector vomega, using the same matrix type as matrix M
     205             : */
     206          30 : static inline PetscErrorCode BV_SetMatrixDiagonal(BV bv,Vec vomega,Mat M)
     207             : {
     208          30 :   Mat      Omega;
     209          30 :   MatType  Mtype;
     210             : 
     211          30 :   PetscFunctionBegin;
     212          30 :   PetscCall(MatGetType(M,&Mtype));
     213          30 :   PetscCall(MatCreate(PetscObjectComm((PetscObject)bv),&Omega));
     214          30 :   PetscCall(MatSetSizes(Omega,bv->n,bv->n,bv->N,bv->N));
     215          30 :   PetscCall(MatSetType(Omega,Mtype));
     216          30 :   PetscCall(MatDiagonalSet(Omega,vomega,INSERT_VALUES));
     217          30 :   PetscCall(BVSetMatrix(bv,Omega,PETSC_TRUE));
     218          30 :   PetscCall(MatDestroy(&Omega));
     219          30 :   PetscFunctionReturn(PETSC_SUCCESS);
     220             : }
     221             : 
     222             : /*
     223             :   BVAvailableVec: First (0) or second (1) vector available for
     224             :   getcolumn operation (or -1 if both vectors already fetched).
     225             : */
     226             : #define BVAvailableVec (((bv->ci[0]==-bv->nc-1)? 0: (bv->ci[1]==-bv->nc-1)? 1: -1))
     227             : 
     228             : /*
     229             :     Macros to test valid BV arguments
     230             : */
     231             : #if !defined(PETSC_USE_DEBUG)
     232             : 
     233             : #define BVCheckSizes(h,arg) do {(void)(h);} while (0)
     234             : #define BVCheckOp(h,arg,op) do {(void)(h);} while (0)
     235             : 
     236             : #else
     237             : 
     238             : #define BVCheckSizes(h,arg) \
     239             :   do { \
     240             :     PetscCheck((h)->m,PetscObjectComm((PetscObject)(h)),PETSC_ERR_ARG_WRONGSTATE,"BV sizes have not been defined: Parameter #%d",arg); \
     241             :   } while (0)
     242             : 
     243             : #define BVCheckOp(h,arg,op) \
     244             :   do { \
     245             :     PetscCheck((h)->ops->op,PetscObjectComm((PetscObject)(h)),PETSC_ERR_SUP,"Operation not implemented in this BV type: Parameter #%d",arg); \
     246             :   } while (0)
     247             : 
     248             : #endif
     249             : 
     250             : SLEPC_INTERN PetscErrorCode BVView_Vecs(BV,PetscViewer);
     251             : 
     252             : SLEPC_INTERN PetscErrorCode BVAllocateWork_Private(BV,PetscInt);
     253             : 
     254             : SLEPC_INTERN PetscErrorCode BVMult_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
     255             : SLEPC_INTERN PetscErrorCode BVMultVec_BLAS_Private(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar,PetscScalar*);
     256             : SLEPC_INTERN PetscErrorCode BVMultInPlace_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,PetscInt,PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscBool);
     257             : SLEPC_INTERN PetscErrorCode BVMultInPlace_Vecs_Private(BV,PetscInt,PetscInt,PetscInt,Vec*,const PetscScalar*,PetscBool);
     258             : SLEPC_INTERN PetscErrorCode BVAXPY_BLAS_Private(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
     259             : SLEPC_INTERN PetscErrorCode BVDot_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar*,PetscInt,PetscBool);
     260             : SLEPC_INTERN PetscErrorCode BVDotVec_BLAS_Private(BV,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar*,PetscBool);
     261             : SLEPC_INTERN PetscErrorCode BVScale_BLAS_Private(BV,PetscInt,PetscScalar*,PetscScalar);
     262             : SLEPC_INTERN PetscErrorCode BVNorm_LAPACK_Private(BV,PetscInt,PetscInt,const PetscScalar*,PetscInt,NormType,PetscReal*,PetscBool);
     263             : SLEPC_INTERN PetscErrorCode BVNormalize_LAPACK_Private(BV,PetscInt,PetscInt,const PetscScalar*,PetscInt,PetscScalar*,PetscBool);
     264             : SLEPC_INTERN PetscErrorCode BVGetMat_Default(BV,Mat*);
     265             : SLEPC_INTERN PetscErrorCode BVRestoreMat_Default(BV,Mat*);
     266             : SLEPC_INTERN PetscErrorCode BVMatCholInv_LAPACK_Private(BV,Mat,Mat);
     267             : SLEPC_INTERN PetscErrorCode BVMatTriInv_LAPACK_Private(BV,Mat,Mat);
     268             : SLEPC_INTERN PetscErrorCode BVMatSVQB_LAPACK_Private(BV,Mat,Mat);
     269             : SLEPC_INTERN PetscErrorCode BVOrthogonalize_LAPACK_TSQR(BV,PetscInt,PetscInt,PetscScalar*,PetscInt,PetscScalar*,PetscInt);
     270             : SLEPC_INTERN PetscErrorCode BVOrthogonalize_LAPACK_TSQR_OnlyR(BV,PetscInt,PetscInt,PetscScalar*,PetscInt,PetscScalar*,PetscInt);
     271             : 
     272             : /* reduction operations used in BVOrthogonalize and BVNormalize */
     273             : SLEPC_EXTERN MPI_Op MPIU_TSQR, MPIU_LAPY2;
     274             : SLEPC_EXTERN void MPIAPI SlepcGivensPacked(void*,void*,PetscMPIInt*,MPI_Datatype*);
     275             : SLEPC_EXTERN void MPIAPI SlepcPythag(void*,void*,PetscMPIInt*,MPI_Datatype*);
     276             : 
     277             : /*
     278             :    BV_CleanCoefficients_Default - Sets to zero all entries of column j of the bv buffer
     279             : */
     280      281017 : static inline PetscErrorCode BV_CleanCoefficients_Default(BV bv,PetscInt j,PetscScalar *h)
     281             : {
     282      281017 :   PetscScalar    *hh=h,*a;
     283      281017 :   PetscInt       i;
     284             : 
     285      281017 :   PetscFunctionBegin;
     286      281017 :   if (!h) {
     287      233967 :     PetscCall(VecGetArray(bv->buffer,&a));
     288      233967 :     hh = a + j*(bv->nc+bv->m);
     289             :   }
     290     4146724 :   for (i=0;i<bv->nc+j;i++) hh[i] = 0.0;
     291      281017 :   if (!h) PetscCall(VecRestoreArray(bv->buffer,&a));
     292      281017 :   PetscFunctionReturn(PETSC_SUCCESS);
     293             : }
     294             : 
     295             : /*
     296             :    BV_AddCoefficients_Default - Add the contents of the scratch (0-th column) of the bv buffer
     297             :    into column j of the bv buffer
     298             : */
     299      447995 : static inline PetscErrorCode BV_AddCoefficients_Default(BV bv,PetscInt j,PetscScalar *h,PetscScalar *c)
     300             : {
     301      447995 :   PetscScalar    *hh=h,*cc=c;
     302      447995 :   PetscInt       i;
     303             : 
     304      447995 :   PetscFunctionBegin;
     305      447995 :   if (!h) {
     306      395997 :     PetscCall(VecGetArray(bv->buffer,&cc));
     307      395997 :     hh = cc + j*(bv->nc+bv->m);
     308             :   }
     309     7401604 :   for (i=0;i<bv->nc+j;i++) hh[i] += cc[i];
     310      447995 :   if (!h) PetscCall(VecRestoreArray(bv->buffer,&cc));
     311      447995 :   PetscCall(PetscLogFlops(1.0*(bv->nc+j)));
     312      447995 :   PetscFunctionReturn(PETSC_SUCCESS);
     313             : }
     314             : 
     315             : /*
     316             :    BV_SetValue_Default - Sets value in row j (counted after the constraints) of column k
     317             :    of the coefficients array
     318             : */
     319      321369 : static inline PetscErrorCode BV_SetValue_Default(BV bv,PetscInt j,PetscInt k,PetscScalar *h,PetscScalar value)
     320             : {
     321      321369 :   PetscScalar    *hh=h,*a;
     322             : 
     323      321369 :   PetscFunctionBegin;
     324      321369 :   if (!h) {
     325      319171 :     PetscCall(VecGetArray(bv->buffer,&a));
     326      319171 :     hh = a + k*(bv->nc+bv->m);
     327             :   }
     328      321369 :   hh[bv->nc+j] = value;
     329      321369 :   if (!h) PetscCall(VecRestoreArray(bv->buffer,&a));
     330      321369 :   PetscFunctionReturn(PETSC_SUCCESS);
     331             : }
     332             : 
     333             : /*
     334             :    BV_SquareSum_Default - Returns the value h'*h, where h represents the contents of the
     335             :    coefficients array (up to position j)
     336             : */
     337      305592 : static inline PetscErrorCode BV_SquareSum_Default(BV bv,PetscInt j,PetscScalar *h,PetscReal *sum)
     338             : {
     339      305592 :   PetscScalar    *hh=h;
     340      305592 :   PetscInt       i;
     341             : 
     342      305592 :   PetscFunctionBegin;
     343      305592 :   *sum = 0.0;
     344      305592 :   if (!h) PetscCall(VecGetArray(bv->buffer,&hh));
     345     3978009 :   for (i=0;i<bv->nc+j;i++) *sum += PetscRealPart(hh[i]*PetscConj(hh[i]));
     346      305592 :   if (!h) PetscCall(VecRestoreArray(bv->buffer,&hh));
     347      305592 :   PetscCall(PetscLogFlops(2.0*(bv->nc+j)));
     348      305592 :   PetscFunctionReturn(PETSC_SUCCESS);
     349             : }
     350             : 
     351             : /*
     352             :    BV_ApplySignature_Default - Computes the pointwise product h*omega, where h represents
     353             :    the contents of the coefficients array (up to position j) and omega is the signature;
     354             :    if inverse=TRUE then the operation is h/omega
     355             : */
     356      205138 : static inline PetscErrorCode BV_ApplySignature_Default(BV bv,PetscInt j,PetscScalar *h,PetscBool inverse)
     357             : {
     358      205138 :   PetscScalar       *hh=h;
     359      205138 :   PetscInt          i;
     360      205138 :   const PetscScalar *omega;
     361             : 
     362      205138 :   PetscFunctionBegin;
     363      205138 :   if (PetscUnlikely(!(bv->nc+j))) PetscFunctionReturn(PETSC_SUCCESS);
     364      204974 :   if (!h) PetscCall(VecGetArray(bv->buffer,&hh));
     365      204974 :   PetscCall(VecGetArrayRead(bv->omega,&omega));
     366     3000230 :   if (inverse) for (i=0;i<bv->nc+j;i++) hh[i] /= PetscRealPart(omega[i]);
     367     2897743 :   else for (i=0;i<bv->nc+j;i++) hh[i] *= PetscRealPart(omega[i]);
     368      204974 :   PetscCall(VecRestoreArrayRead(bv->omega,&omega));
     369      204974 :   if (!h) PetscCall(VecRestoreArray(bv->buffer,&hh));
     370      204974 :   PetscCall(PetscLogFlops(1.0*(bv->nc+j)));
     371      204974 :   PetscFunctionReturn(PETSC_SUCCESS);
     372             : }
     373             : 
     374             : /*
     375             :    BV_SquareRoot_Default - Returns the square root of position j (counted after the constraints)
     376             :    of the coefficients array
     377             : */
     378      306378 : static inline PetscErrorCode BV_SquareRoot_Default(BV bv,PetscInt j,PetscScalar *h,PetscReal *beta)
     379             : {
     380      306378 :   PetscScalar    *hh=h;
     381             : 
     382      306378 :   PetscFunctionBegin;
     383      306378 :   if (!h) PetscCall(VecGetArray(bv->buffer,&hh));
     384      306378 :   PetscCall(BV_SafeSqrt(bv,hh[bv->nc+j],beta));
     385      306378 :   if (!h) PetscCall(VecRestoreArray(bv->buffer,&hh));
     386      306378 :   PetscFunctionReturn(PETSC_SUCCESS);
     387             : }
     388             : 
     389             : /*
     390             :    BV_StoreCoefficients_Default - Copy the contents of the coefficients array to an array dest
     391             :    provided by the caller (only values from l to j are copied)
     392             : */
     393      113175 : static inline PetscErrorCode BV_StoreCoefficients_Default(BV bv,PetscInt j,PetscScalar *h,PetscScalar *dest)
     394             : {
     395      113175 :   PetscScalar    *hh=h,*a;
     396      113175 :   PetscInt       i;
     397             : 
     398      113175 :   PetscFunctionBegin;
     399      113175 :   if (!h) {
     400       84388 :     PetscCall(VecGetArray(bv->buffer,&a));
     401       84388 :     hh = a + j*(bv->nc+bv->m);
     402             :   }
     403     1997640 :   for (i=bv->l;i<j;i++) dest[i-bv->l] = hh[bv->nc+i];
     404      113175 :   if (!h) PetscCall(VecRestoreArray(bv->buffer,&a));
     405      113175 :   PetscFunctionReturn(PETSC_SUCCESS);
     406             : }
     407             : 
     408             : /*
     409             :   BV_GetEigenvector - retrieves k-th eigenvector from basis vectors V.
     410             :   The argument eigi is the imaginary part of the corresponding eigenvalue.
     411             : */
     412        8710 : static inline PetscErrorCode BV_GetEigenvector(BV V,PetscInt k,PetscScalar eigi,Vec Vr,Vec Vi)
     413             : {
     414        8710 :   PetscFunctionBegin;
     415             : #if defined(PETSC_USE_COMPLEX)
     416             :   if (Vr) PetscCall(BVCopyVec(V,k,Vr));
     417             :   if (Vi) PetscCall(VecSet(Vi,0.0));
     418             : #else
     419        8710 :   if (eigi > 0.0) { /* first value of conjugate pair */
     420         183 :     if (Vr) PetscCall(BVCopyVec(V,k,Vr));
     421         183 :     if (Vi) PetscCall(BVCopyVec(V,k+1,Vi));
     422        8527 :   } else if (eigi < 0.0) { /* second value of conjugate pair */
     423         140 :     if (Vr) PetscCall(BVCopyVec(V,k-1,Vr));
     424         140 :     if (Vi) {
     425         140 :       PetscCall(BVCopyVec(V,k,Vi));
     426         140 :       PetscCall(VecScale(Vi,-1.0));
     427             :     }
     428             :   } else { /* real eigenvalue */
     429        8387 :     if (Vr) PetscCall(BVCopyVec(V,k,Vr));
     430        8387 :     if (Vi) PetscCall(VecSet(Vi,0.0));
     431             :   }
     432             : #endif
     433        8710 :   PetscFunctionReturn(PETSC_SUCCESS);
     434             : }
     435             : 
     436             : /*
     437             :    BV_OrthogonalizeColumn_Safe - this is intended for cases where we know that
     438             :    the resulting vector is going to be numerically zero, so normalization or
     439             :    iterative refinement may cause problems in parallel (collective operation
     440             :    not being called by all processes)
     441             : */
     442          40 : static inline PetscErrorCode BV_OrthogonalizeColumn_Safe(BV bv,PetscInt j,PetscScalar *H,PetscReal *norm,PetscBool *lindep)
     443             : {
     444          40 :   BVOrthogRefineType orthog_ref;
     445             : 
     446          40 :   PetscFunctionBegin;
     447          40 :   PetscCall(PetscInfo(bv,"Orthogonalizing column %" PetscInt_FMT " without refinement\n",j));
     448          40 :   orthog_ref     = bv->orthog_ref;
     449          40 :   bv->orthog_ref = BV_ORTHOG_REFINE_NEVER;  /* avoid refinement */
     450          40 :   PetscCall(BVOrthogonalizeColumn(bv,j,H,NULL,NULL));
     451          40 :   bv->orthog_ref = orthog_ref;  /* restore refinement setting */
     452          40 :   if (norm)   *norm  = 0.0;
     453          40 :   if (lindep) *lindep = PETSC_TRUE;
     454          40 :   PetscFunctionReturn(PETSC_SUCCESS);
     455             : }
     456             : 
     457             : /*
     458             :    BV_SetDefaultLD - set the default value of the leading dimension, based on
     459             :    the local size.
     460             : */
     461       13773 : static inline PetscErrorCode BV_SetDefaultLD(BV bv,PetscInt nloc)
     462             : {
     463       13773 :   size_t bytes,align;
     464             : 
     465       13773 :   PetscFunctionBegin;
     466       13773 :   if (bv->ld) {   /* set by user */
     467       11680 :     PetscCheck(bv->ld>=nloc,PetscObjectComm((PetscObject)bv),PETSC_ERR_USER_INPUT,"The leading dimension %" PetscInt_FMT " should be larger or equal to the local number of rows %" PetscInt_FMT,bv->ld,nloc);
     468             :   } else {
     469        2093 :     align  = PetscMax(PETSC_MEMALIGN,16);   /* assume that CUDA requires 16-byte alignment */
     470        2093 :     bytes  = (nloc*sizeof(PetscScalar) + align - 1) & ~(align - 1);
     471        2093 :     bv->ld = bytes/sizeof(PetscScalar);
     472             :   }
     473       13773 :   PetscFunctionReturn(PETSC_SUCCESS);
     474             : }
     475             : 
     476             : #if defined(PETSC_HAVE_CUDA)
     477             : /*
     478             :    BV_MatDenseCUDAGetArrayRead - if Q is MATSEQDENSE it will allocate memory on the
     479             :    GPU and copy the contents; otherwise, calls MatDenseCUDAGetArrayRead()
     480             : */
     481             : static inline PetscErrorCode BV_MatDenseCUDAGetArrayRead(BV bv,Mat Q,const PetscScalar **d_q)
     482             : {
     483             :   const PetscScalar *q;
     484             :   PetscInt          ldq,mq;
     485             :   PetscCuBLASInt    ldq_=0;
     486             :   PetscBool         matiscuda;
     487             : 
     488             :   PetscFunctionBegin;
     489             :   (void)bv; // avoid unused parameter warning
     490             :   PetscCall(MatGetSize(Q,NULL,&mq));
     491             :   PetscCall(MatDenseGetLDA(Q,&ldq));
     492             :   PetscCall(PetscCuBLASIntCast(ldq,&ldq_));
     493             :   PetscCall(PetscObjectTypeCompare((PetscObject)Q,MATSEQDENSECUDA,&matiscuda));
     494             :   if (matiscuda) PetscCall(MatDenseCUDAGetArrayRead(Q,d_q));
     495             :   else {
     496             :     PetscCall(MatDenseGetArrayRead(Q,&q));
     497             :     PetscCallCUDA(cudaMalloc((void**)d_q,ldq*mq*sizeof(PetscScalar)));
     498             :     PetscCallCUDA(cudaMemcpy((void*)*d_q,q,ldq*mq*sizeof(PetscScalar),cudaMemcpyHostToDevice));
     499             :     PetscCall(PetscLogCpuToGpu(ldq*mq*sizeof(PetscScalar)));
     500             :   }
     501             :   PetscFunctionReturn(PETSC_SUCCESS);
     502             : }
     503             : 
     504             : /*
     505             :    BV_MatDenseCUDARestoreArrayRead - restores the pointer obtained with BV_MatDenseCUDAGetArrayRead(),
     506             :    freeing the GPU memory in case of MATSEQDENSE
     507             : */
     508             : static inline PetscErrorCode BV_MatDenseCUDARestoreArrayRead(BV bv,Mat Q,const PetscScalar **d_q)
     509             : {
     510             :   PetscBool matiscuda;
     511             : 
     512             :   PetscFunctionBegin;
     513             :   (void)bv; // avoid unused parameter warning
     514             :   PetscCall(PetscObjectTypeCompare((PetscObject)Q,MATSEQDENSECUDA,&matiscuda));
     515             :   if (matiscuda) PetscCall(MatDenseCUDARestoreArrayRead(Q,d_q));
     516             :   else {
     517             :     PetscCall(MatDenseRestoreArrayRead(Q,NULL));
     518             :     PetscCallCUDA(cudaFree((void*)*d_q));
     519             :     *d_q = NULL;
     520             :   }
     521             :   PetscFunctionReturn(PETSC_SUCCESS);
     522             : }
     523             : 
     524             : SLEPC_INTERN PetscErrorCode BVMult_BLAS_CUDA(BV,PetscInt,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
     525             : SLEPC_INTERN PetscErrorCode BVMultVec_BLAS_CUDA(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar,PetscScalar*);
     526             : SLEPC_INTERN PetscErrorCode BVMultInPlace_BLAS_CUDA(BV,PetscInt,PetscInt,PetscInt,PetscInt,PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscBool);
     527             : SLEPC_INTERN PetscErrorCode BVAXPY_BLAS_CUDA(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscInt,PetscScalar,PetscScalar*,PetscInt);
     528             : SLEPC_INTERN PetscErrorCode BVDot_BLAS_CUDA(BV,PetscInt,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscInt,PetscScalar*,PetscInt,PetscBool);
     529             : SLEPC_INTERN PetscErrorCode BVDotVec_BLAS_CUDA(BV,PetscInt,PetscInt,const PetscScalar*,PetscInt,const PetscScalar*,PetscScalar*,PetscBool);
     530             : SLEPC_INTERN PetscErrorCode BVScale_BLAS_CUDA(BV,PetscInt,PetscScalar*,PetscScalar);
     531             : SLEPC_INTERN PetscErrorCode BVNorm_BLAS_CUDA(BV,PetscInt,const PetscScalar*,PetscReal*);
     532             : SLEPC_INTERN PetscErrorCode BVNormalize_BLAS_CUDA(BV,PetscInt,PetscInt,PetscScalar*,PetscInt,PetscScalar*);
     533             : 
     534             : SLEPC_INTERN PetscErrorCode BV_CleanCoefficients_CUDA(BV,PetscInt,PetscScalar*);
     535             : SLEPC_INTERN PetscErrorCode BV_AddCoefficients_CUDA(BV,PetscInt,PetscScalar*,PetscScalar*);
     536             : SLEPC_INTERN PetscErrorCode BV_SetValue_CUDA(BV,PetscInt,PetscInt,PetscScalar*,PetscScalar);
     537             : SLEPC_INTERN PetscErrorCode BV_SquareSum_CUDA(BV,PetscInt,PetscScalar*,PetscReal*);
     538             : SLEPC_INTERN PetscErrorCode BV_ApplySignature_CUDA(BV,PetscInt,PetscScalar*,PetscBool);
     539             : SLEPC_INTERN PetscErrorCode BV_SquareRoot_CUDA(BV,PetscInt,PetscScalar*,PetscReal*);
     540             : SLEPC_INTERN PetscErrorCode BV_StoreCoefficients_CUDA(BV,PetscInt,PetscScalar*,PetscScalar*);
     541             : #define BV_CleanCoefficients(a,b,c)   ((a)->cuda?BV_CleanCoefficients_CUDA:BV_CleanCoefficients_Default)((a),(b),(c))
     542             : #define BV_AddCoefficients(a,b,c,d)   ((a)->cuda?BV_AddCoefficients_CUDA:BV_AddCoefficients_Default)((a),(b),(c),(d))
     543             : #define BV_SetValue(a,b,c,d,e)        ((a)->cuda?BV_SetValue_CUDA:BV_SetValue_Default)((a),(b),(c),(d),(e))
     544             : #define BV_SquareSum(a,b,c,d)         ((a)->cuda?BV_SquareSum_CUDA:BV_SquareSum_Default)((a),(b),(c),(d))
     545             : #define BV_ApplySignature(a,b,c,d)    ((a)->cuda?BV_ApplySignature_CUDA:BV_ApplySignature_Default)((a),(b),(c),(d))
     546             : #define BV_SquareRoot(a,b,c,d)        ((a)->cuda?BV_SquareRoot_CUDA:BV_SquareRoot_Default)((a),(b),(c),(d))
     547             : #define BV_StoreCoefficients(a,b,c,d) ((a)->cuda?BV_StoreCoefficients_CUDA:BV_StoreCoefficients_Default)((a),(b),(c),(d))
     548             : #else
     549             : #define BV_CleanCoefficients(a,b,c)   BV_CleanCoefficients_Default((a),(b),(c))
     550             : #define BV_AddCoefficients(a,b,c,d)   BV_AddCoefficients_Default((a),(b),(c),(d))
     551             : #define BV_SetValue(a,b,c,d,e)        BV_SetValue_Default((a),(b),(c),(d),(e))
     552             : #define BV_SquareSum(a,b,c,d)         BV_SquareSum_Default((a),(b),(c),(d))
     553             : #define BV_ApplySignature(a,b,c,d)    BV_ApplySignature_Default((a),(b),(c),(d))
     554             : #define BV_SquareRoot(a,b,c,d)        BV_SquareRoot_Default((a),(b),(c),(d))
     555             : #define BV_StoreCoefficients(a,b,c,d) BV_StoreCoefficients_Default((a),(b),(c),(d))
     556             : #endif /* PETSC_HAVE_CUDA */

Generated by: LCOV version 1.14