LCOV - code coverage report
Current view: top level - sys/classes/bv/interface - bvcontour.c (source / functions) Hit Total Coverage
Test: SLEPc Lines: 289 289 100.0 %
Date: 2024-11-21 00:40:22 Functions: 9 9 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       3             :    SLEPc - Scalable Library for Eigenvalue Problem Computations
       4             :    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
       5             : 
       6             :    This file is part of SLEPc.
       7             :    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
       8             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       9             : */
      10             : /*
      11             :    BV developer functions needed in contour integral methods
      12             : */
      13             : 
      14             : #include <slepc/private/bvimpl.h>            /*I "slepcbv.h" I*/
      15             : #include <slepcblaslapack.h>
      16             : 
      17             : #define p_id(i) (i*subcomm->n + subcomm->color)
      18             : 
      19             : /*@
      20             :    BVScatter - Scatters the columns of a BV to another BV created in a
      21             :    subcommunicator.
      22             : 
      23             :    Collective
      24             : 
      25             :    Input Parameters:
      26             : +  Vin  - input basis vectors (defined on the whole communicator)
      27             : .  scat - VecScatter object that contains the info for the communication
      28             : -  xdup - an auxiliary vector
      29             : 
      30             :    Output Parameter:
      31             : .  Vout - output basis vectors (defined on the subcommunicator)
      32             : 
      33             :    Notes:
      34             :    Currently implemented as a loop for each the active column, where each
      35             :    column is scattered independently. The vector xdup is defined on the
      36             :    contiguous parent communicator and have enough space to store one
      37             :    duplicate of the original vector per each subcommunicator.
      38             : 
      39             :    Level: developer
      40             : 
      41             : .seealso: BVGetColumn()
      42             : @*/
      43          30 : PetscErrorCode BVScatter(BV Vin,BV Vout,VecScatter scat,Vec xdup)
      44             : {
      45          30 :   PetscInt          i;
      46          30 :   Vec               v,z;
      47          30 :   const PetscScalar *array;
      48             : 
      49          30 :   PetscFunctionBegin;
      50          30 :   PetscValidHeaderSpecific(Vin,BV_CLASSID,1);
      51          30 :   PetscValidHeaderSpecific(Vout,BV_CLASSID,2);
      52          30 :   PetscValidHeaderSpecific(scat,PETSCSF_CLASSID,3);
      53          30 :   PetscValidHeaderSpecific(xdup,VEC_CLASSID,4);
      54          30 :   PetscCall(BVCreateVec(Vout,&z));
      55         424 :   for (i=Vin->l;i<Vin->k;i++) {
      56         394 :     PetscCall(BVGetColumn(Vin,i,&v));
      57         394 :     PetscCall(VecScatterBegin(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
      58         394 :     PetscCall(VecScatterEnd(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
      59         394 :     PetscCall(BVRestoreColumn(Vin,i,&v));
      60         394 :     PetscCall(VecGetArrayRead(xdup,&array));
      61         394 :     PetscCall(VecPlaceArray(z,array));
      62         394 :     PetscCall(BVInsertVec(Vout,i,z));
      63         394 :     PetscCall(VecResetArray(z));
      64         394 :     PetscCall(VecRestoreArrayRead(xdup,&array));
      65             :   }
      66          30 :   PetscCall(VecDestroy(&z));
      67          30 :   PetscFunctionReturn(PETSC_SUCCESS);
      68             : }
      69             : 
      70             : /*@
      71             :    BVSumQuadrature - Computes the sum of terms required in the quadrature
      72             :    rule to approximate the contour integral.
      73             : 
      74             :    Collective
      75             : 
      76             :    Input Parameters:
      77             : +  Y       - input basis vectors
      78             : .  M       - number of moments
      79             : .  L       - block size
      80             : .  L_max   - maximum block size
      81             : .  w       - quadrature weights
      82             : .  zn      - normalized quadrature points
      83             : .  scat    - (optional) VecScatter object to communicate between subcommunicators
      84             : .  subcomm - subcommunicator layout
      85             : .  npoints - number of points to process by the subcommunicator
      86             : -  useconj - whether conjugate points can be used or not
      87             : 
      88             :    Output Parameter:
      89             : .  S       - output basis vectors
      90             : 
      91             :    Notes:
      92             :    This is a generalization of BVMult(). The resulting matrix S consists of M
      93             :    panels of L columns, and the following formula is computed for each panel
      94             :    S_k = sum_j w_j*zn_j^k*Y_j, where Y_j is the j-th panel of Y containing
      95             :    the result of solving T(z_j)^{-1}*X for each integration point j. L_max is
      96             :    the width of the panels in Y.
      97             : 
      98             :    When using subcommunicators, Y is stored in the subcommunicators for a subset
      99             :    of integration points. In that case, the computation is done in the subcomm
     100             :    and then scattered to the whole communicator in S using the VecScatter scat.
     101             :    The value npoints is the number of points to be processed in this subcomm
     102             :    and the flag useconj indicates whether symmetric points can be reused.
     103             : 
     104             :    Level: developer
     105             : 
     106             : .seealso: BVMult(), BVScatter(), BVDotQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
     107             : @*/
     108          88 : PetscErrorCode BVSumQuadrature(BV S,BV Y,PetscInt M,PetscInt L,PetscInt L_max,PetscScalar *w,PetscScalar *zn,VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
     109             : {
     110          88 :   PetscInt       i,j,k,nloc;
     111          88 :   Vec            v,sj;
     112          88 :   PetscScalar    *ppk,*pv,one=1.0;
     113             : 
     114          88 :   PetscFunctionBegin;
     115          88 :   PetscValidHeaderSpecific(S,BV_CLASSID,1);
     116          88 :   PetscValidHeaderSpecific(Y,BV_CLASSID,2);
     117          88 :   if (scat) PetscValidHeaderSpecific(scat,PETSCSF_CLASSID,8);
     118             : 
     119          88 :   PetscCall(BVGetSizes(Y,&nloc,NULL,NULL));
     120          88 :   PetscCall(PetscMalloc1(npoints,&ppk));
     121        2116 :   for (i=0;i<npoints;i++) ppk[i] = 1.0;
     122          88 :   PetscCall(BVCreateVec(Y,&v));
     123         716 :   for (k=0;k<M;k++) {
     124        8480 :     for (j=0;j<L;j++) {
     125        7852 :       PetscCall(VecSet(v,0.0));
     126      195852 :       for (i=0;i<npoints;i++) {
     127      188000 :         PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
     128      188000 :         PetscCall(BVMultVec(Y,ppk[i]*w[p_id(i)],1.0,v,&one));
     129             :       }
     130        7852 :       if (PetscUnlikely(useconj)) {
     131        1120 :         PetscCall(VecGetArray(v,&pv));
     132      674656 :         for (i=0;i<nloc;i++) pv[i] = 2.0*PetscRealPart(pv[i]);
     133        1120 :         PetscCall(VecRestoreArray(v,&pv));
     134             :       }
     135        7852 :       PetscCall(BVGetColumn(S,k*L+j,&sj));
     136        7852 :       if (PetscUnlikely(scat)) {
     137        2768 :         PetscCall(VecScatterBegin(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
     138        2768 :         PetscCall(VecScatterEnd(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
     139        5084 :       } else PetscCall(VecCopy(v,sj));
     140        7852 :       PetscCall(BVRestoreColumn(S,k*L+j,&sj));
     141             :     }
     142       15108 :     for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
     143             :   }
     144          88 :   PetscCall(PetscFree(ppk));
     145          88 :   PetscCall(VecDestroy(&v));
     146          88 :   PetscFunctionReturn(PETSC_SUCCESS);
     147             : }
     148             : 
     149             : /*@
     150             :    BVDotQuadrature - Computes the projection terms required in the quadrature
     151             :    rule to approximate the contour integral.
     152             : 
     153             :    Collective
     154             : 
     155             :    Input Parameters:
     156             : +  Y       - first basis vectors
     157             : .  V       - second basis vectors
     158             : .  M       - number of moments
     159             : .  L       - block size
     160             : .  L_max   - maximum block size
     161             : .  w       - quadrature weights
     162             : .  zn      - normalized quadrature points
     163             : .  subcomm - subcommunicator layout
     164             : .  npoints - number of points to process by the subcommunicator
     165             : -  useconj - whether conjugate points can be used or not
     166             : 
     167             :    Output Parameter:
     168             : .  Mu      - computed result
     169             : 
     170             :    Notes:
     171             :    This is a generalization of BVDot(). The resulting matrix Mu consists of M
     172             :    blocks of size LxL (placed horizontally), each of them computed as
     173             :    Mu_k = sum_j w_j*zn_j^k*V'*Y_j, where Y_j is the j-th panel of Y containing
     174             :    the result of solving T(z_j)^{-1}*X for each integration point j. L_max is
     175             :    the width of the panels in Y.
     176             : 
     177             :    When using subcommunicators, Y is stored in the subcommunicators for a subset
     178             :    of integration points. In that case, the computation is done in the subcomm
     179             :    and then the final result is combined via reduction.
     180             :    The value npoints is the number of points to be processed in this subcomm
     181             :    and the flag useconj indicates whether symmetric points can be reused.
     182             : 
     183             :    Level: developer
     184             : 
     185             : .seealso: BVDot(), BVScatter(), BVSumQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
     186             : @*/
     187          26 : PetscErrorCode BVDotQuadrature(BV Y,BV V,PetscScalar *Mu,PetscInt M,PetscInt L,PetscInt L_max,PetscScalar *w,PetscScalar *zn,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
     188             : {
     189          26 :   PetscMPIInt       sub_size,count;
     190          26 :   PetscInt          i,j,k,s;
     191          26 :   PetscScalar       *temp,*temp2,*ppk,alp;
     192          26 :   Mat               H;
     193          26 :   const PetscScalar *pH;
     194          26 :   MPI_Comm          child,parent;
     195             : 
     196          26 :   PetscFunctionBegin;
     197          26 :   PetscValidHeaderSpecific(Y,BV_CLASSID,1);
     198          26 :   PetscValidHeaderSpecific(V,BV_CLASSID,2);
     199             : 
     200          26 :   PetscCall(PetscSubcommGetChild(subcomm,&child));
     201          26 :   PetscCallMPI(MPI_Comm_size(child,&sub_size));
     202          26 :   PetscCall(PetscMalloc3(npoints*L*(L+1),&temp,2*M*L*L,&temp2,npoints,&ppk));
     203          26 :   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,L,L_max*npoints,NULL,&H));
     204          26 :   PetscCall(PetscArrayzero(temp2,2*M*L*L));
     205          26 :   PetscCall(BVSetActiveColumns(Y,0,L_max*npoints));
     206          26 :   PetscCall(BVSetActiveColumns(V,0,L));
     207          26 :   PetscCall(BVDot(Y,V,H));
     208          26 :   PetscCall(MatDenseGetArrayRead(H,&pH));
     209         718 :   for (i=0;i<npoints;i++) {
     210        8212 :     for (j=0;j<L;j++) {
     211      110144 :       for (k=0;k<L;k++) {
     212      102624 :         temp[k+j*L+i*L*L] = pH[k+j*L+i*L*L_max];
     213             :       }
     214             :     }
     215             :   }
     216          26 :   PetscCall(MatDenseRestoreArrayRead(H,&pH));
     217         718 :   for (i=0;i<npoints;i++) ppk[i] = 1;
     218         354 :   for (k=0;k<2*M;k++) {
     219        3344 :     for (j=0;j<L;j++) {
     220       94408 :       for (i=0;i<npoints;i++) {
     221       91392 :         alp = ppk[i]*w[p_id(i)];
     222     1327104 :         for (s=0;s<L;s++) {
     223     1235712 :           if (!useconj) temp2[s+(j+k*L)*L] += alp*temp[s+(j+i*L)*L];
     224       16896 :           else temp2[s+(j+k*L)*L] += 2.0*PetscRealPart(alp*temp[s+(j+i*L)*L]);
     225             :         }
     226             :       }
     227             :     }
     228        9096 :     for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
     229             :   }
     230       38802 :   for (i=0;i<2*M*L*L;i++) temp2[i] /= sub_size;
     231          26 :   PetscCall(PetscMPIIntCast(2*M*L*L,&count));
     232          26 :   PetscCall(PetscSubcommGetParent(subcomm,&parent));
     233          78 :   PetscCallMPI(MPIU_Allreduce(temp2,Mu,count,MPIU_SCALAR,MPIU_SUM,parent));
     234          26 :   PetscCall(PetscFree3(temp,temp2,ppk));
     235          26 :   PetscCall(MatDestroy(&H));
     236          26 :   PetscFunctionReturn(PETSC_SUCCESS);
     237             : }
     238             : 
     239             : /*@
     240             :    BVTraceQuadrature - Computes an estimate of the number of eigenvalues
     241             :    inside a region via quantities computed in the quadrature rule of
     242             :    contour integral methods.
     243             : 
     244             :    Collective
     245             : 
     246             :    Input Parameters:
     247             : +  Y       - first basis vectors
     248             : .  V       - second basis vectors
     249             : .  L       - block size
     250             : .  L_max   - maximum block size
     251             : .  w       - quadrature weights
     252             : .  scat    - (optional) VecScatter object to communicate between subcommunicators
     253             : .  subcomm - subcommunicator layout
     254             : .  npoints - number of points to process by the subcommunicator
     255             : -  useconj - whether conjugate points can be used or not
     256             : 
     257             :    Output Parameter:
     258             : .  est_eig - estimated eigenvalue count
     259             : 
     260             :    Notes:
     261             :    This function returns an estimation of the number of eigenvalues in the
     262             :    region, computed as trace(V'*S_0), where S_0 is the first panel of S
     263             :    computed by BVSumQuadrature().
     264             : 
     265             :    When using subcommunicators, Y is stored in the subcommunicators for a subset
     266             :    of integration points. In that case, the computation is done in the subcomm
     267             :    and then scattered to the whole communicator in S using the VecScatter scat.
     268             :    The value npoints is the number of points to be processed in this subcomm
     269             :    and the flag useconj indicates whether symmetric points can be reused.
     270             : 
     271             :    Level: developer
     272             : 
     273             : .seealso: BVScatter(), BVDotQuadrature(), BVSumQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
     274             : @*/
     275          73 : PetscErrorCode BVTraceQuadrature(BV Y,BV V,PetscInt L,PetscInt L_max,PetscScalar *w,VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj,PetscReal *est_eig)
     276             : {
     277          73 :   PetscInt       i,j;
     278          73 :   Vec            y,yall,vj;
     279          73 :   PetscScalar    dot,sum=0.0,one=1.0;
     280             : 
     281          73 :   PetscFunctionBegin;
     282          73 :   PetscValidHeaderSpecific(Y,BV_CLASSID,1);
     283          73 :   PetscValidHeaderSpecific(V,BV_CLASSID,2);
     284          73 :   if (scat) PetscValidHeaderSpecific(scat,PETSCSF_CLASSID,6);
     285             : 
     286          73 :   PetscCall(BVCreateVec(Y,&y));
     287          73 :   PetscCall(BVCreateVec(V,&yall));
     288         952 :   for (j=0;j<L;j++) {
     289         879 :     PetscCall(VecSet(y,0.0));
     290       21003 :     for (i=0;i<npoints;i++) {
     291       20124 :       PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
     292       20124 :       PetscCall(BVMultVec(Y,w[p_id(i)],1.0,y,&one));
     293             :     }
     294         879 :     PetscCall(BVGetColumn(V,j,&vj));
     295         879 :     if (scat) {
     296         394 :       PetscCall(VecScatterBegin(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
     297         394 :       PetscCall(VecScatterEnd(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
     298         394 :       PetscCall(VecDot(vj,yall,&dot));
     299         485 :     } else PetscCall(VecDot(vj,y,&dot));
     300         879 :     PetscCall(BVRestoreColumn(V,j,&vj));
     301         879 :     if (useconj) sum += 2.0*PetscRealPart(dot);
     302         755 :     else sum += dot;
     303             :   }
     304          73 :   *est_eig = PetscAbsScalar(sum)/(PetscReal)L;
     305          73 :   PetscCall(VecDestroy(&y));
     306          73 :   PetscCall(VecDestroy(&yall));
     307          73 :   PetscFunctionReturn(PETSC_SUCCESS);
     308             : }
     309             : 
     310          28 : static PetscErrorCode BVSVDAndRank_Refine(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
     311             : {
     312          28 :   PetscInt       i,j,k,ml=S->k;
     313          28 :   PetscMPIInt    len;
     314          28 :   PetscScalar    *work,*B,*tempB,*sarray,*Q1,*Q2,*temp2,alpha=1.0,beta=0.0;
     315          28 :   PetscBLASInt   l,m,n,lda,ldu,ldvt,lwork,info,ldb,ldc,lds;
     316             : #if defined(PETSC_USE_COMPLEX)
     317          28 :   PetscReal      *rwork;
     318             : #endif
     319             : 
     320          28 :   PetscFunctionBegin;
     321          28 :   PetscCall(PetscBLASIntCast(S->ld,&lds));
     322          28 :   PetscCall(BVGetArray(S,&sarray));
     323          28 :   PetscCall(PetscMalloc6(ml*ml,&temp2,S->n*ml,&Q1,S->n*ml,&Q2,ml*ml,&B,ml*ml,&tempB,5*ml,&work));
     324             : #if defined(PETSC_USE_COMPLEX)
     325          28 :   PetscCall(PetscMalloc1(5*ml,&rwork));
     326             : #endif
     327          28 :   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
     328             : 
     329          28 :   PetscCall(PetscArrayzero(B,ml*ml));
     330        3104 :   for (i=0;i<ml;i++) B[i*ml+i]=1;
     331             : 
     332          84 :   for (k=0;k<2;k++) {
     333          56 :     PetscCall(PetscBLASIntCast(S->n,&m));
     334          56 :     PetscCall(PetscBLASIntCast(ml,&l));
     335          56 :     n = l; lda = m; ldb = m; ldc = l;
     336          56 :     if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,sarray,&lds,sarray,&lds,&beta,pA,&ldc));
     337          28 :     else PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,Q1,&lda,Q1,&ldb,&beta,pA,&ldc));
     338          56 :     PetscCall(PetscArrayzero(temp2,ml*ml));
     339          56 :     PetscCall(PetscMPIIntCast(ml*ml,&len));
     340         168 :     PetscCallMPI(MPIU_Allreduce(pA,temp2,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)S)));
     341             : 
     342          56 :     PetscCall(PetscBLASIntCast(ml,&m));
     343          56 :     n = m; lda = m; lwork = 5*m, ldu = 1; ldvt = 1;
     344             : #if defined(PETSC_USE_COMPLEX)
     345          56 :     PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
     346             : #else
     347             :     PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
     348             : #endif
     349          56 :     SlepcCheckLapackInfo("gesvd",info);
     350             : 
     351          56 :     PetscCall(PetscBLASIntCast(S->n,&l));
     352          56 :     PetscCall(PetscBLASIntCast(ml,&n));
     353          56 :     m = n; lda = l; ldb = m; ldc = l;
     354          56 :     if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,sarray,&lds,temp2,&ldb,&beta,Q1,&ldc));
     355          28 :     else PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,Q1,&lda,temp2,&ldb,&beta,Q2,&ldc));
     356             : 
     357          56 :     PetscCall(PetscBLASIntCast(ml,&l));
     358          56 :     m = l; n = l; lda = l; ldb = m; ldc = l;
     359          56 :     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,B,&lda,temp2,&ldb,&beta,tempB,&ldc));
     360        6208 :     for (i=0;i<ml;i++) {
     361        6152 :       sigma[i] = PetscSqrtReal(sigma[i]);
     362     2475624 :       for (j=0;j<S->n;j++) {
     363     2469472 :         if (k%2) Q2[j+i*S->n] /= sigma[i];
     364     1234736 :         else Q1[j+i*S->n] /= sigma[i];
     365             :       }
     366      753064 :       for (j=0;j<ml;j++) B[j+i*ml] = tempB[j+i*ml]*sigma[i];
     367             :     }
     368             :   }
     369             : 
     370          28 :   PetscCall(PetscBLASIntCast(ml,&m));
     371          28 :   n = m; lda = m; ldu=1; ldvt=1;
     372             : #if defined(PETSC_USE_COMPLEX)
     373          28 :   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
     374             : #else
     375             :   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
     376             : #endif
     377          28 :   SlepcCheckLapackInfo("gesvd",info);
     378             : 
     379          28 :   PetscCall(PetscBLASIntCast(S->n,&l));
     380          28 :   PetscCall(PetscBLASIntCast(ml,&n));
     381          28 :   m = n; lda = l; ldb = m;
     382          28 :   if (k%2) PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q1,&lda,B,&ldb,&beta,sarray,&lds));
     383          28 :   else PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q2,&lda,B,&ldb,&beta,sarray,&lds));
     384             : 
     385          28 :   PetscCall(PetscFPTrapPop());
     386          28 :   PetscCall(BVRestoreArray(S,&sarray));
     387             : 
     388          28 :   if (rank) {
     389          28 :     (*rank) = 0;
     390        3104 :     for (i=0;i<ml;i++) {
     391        6152 :       if (sigma[i]/PetscMax(sigma[0],1.0)>delta) (*rank)++;
     392             :     }
     393             :   }
     394          28 :   PetscCall(PetscFree6(temp2,Q1,Q2,B,tempB,work));
     395             : #if defined(PETSC_USE_COMPLEX)
     396          28 :   PetscCall(PetscFree(rwork));
     397             : #endif
     398          28 :   PetscFunctionReturn(PETSC_SUCCESS);
     399             : }
     400             : 
     401          34 : static PetscErrorCode BVSVDAndRank_QR(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
     402             : {
     403          34 :   PetscInt       i,n,ml=S->k;
     404          34 :   PetscBLASInt   m,lda,lwork,info;
     405          34 :   PetscScalar    *work;
     406          34 :   PetscReal      *rwork;
     407          34 :   Mat            A;
     408          34 :   Vec            v;
     409             : 
     410          34 :   PetscFunctionBegin;
     411             :   /* Compute QR factorizaton of S */
     412          34 :   PetscCall(BVGetSizes(S,NULL,&n,NULL));
     413          34 :   n = PetscMin(n,ml);
     414          34 :   PetscCall(BVSetActiveColumns(S,0,n));
     415          34 :   PetscCall(PetscArrayzero(pA,ml*n));
     416          34 :   PetscCall(MatCreateDense(PETSC_COMM_SELF,n,n,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
     417          34 :   PetscCall(BVOrthogonalize(S,A));
     418          34 :   if (n<ml) {
     419             :     /* the rest of the factorization */
     420         533 :     for (i=n;i<ml;i++) {
     421         522 :       PetscCall(BVGetColumn(S,i,&v));
     422         522 :       PetscCall(BVOrthogonalizeVec(S,v,pA+i*n,NULL,NULL));
     423         522 :       PetscCall(BVRestoreColumn(S,i,&v));
     424             :     }
     425             :   }
     426          34 :   PetscCall(PetscBLASIntCast(n,&lda));
     427          34 :   PetscCall(PetscBLASIntCast(ml,&m));
     428          34 :   PetscCall(PetscMalloc2(5*ml,&work,5*ml,&rwork));
     429          34 :   lwork = 5*m;
     430          34 :   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
     431             : #if !defined (PETSC_USE_COMPLEX)
     432             :   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,&info));
     433             : #else
     434          34 :   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,rwork,&info));
     435             : #endif
     436          34 :   SlepcCheckLapackInfo("gesvd",info);
     437          34 :   PetscCall(PetscFPTrapPop());
     438          34 :   *rank = 0;
     439        2848 :   for (i=0;i<n;i++) {
     440        5628 :     if (sigma[i]/PetscMax(sigma[0],1)>delta) (*rank)++;
     441             :   }
     442             :   /* n first columns of A have the left singular vectors */
     443          34 :   PetscCall(BVMultInPlace(S,A,0,*rank));
     444          34 :   PetscCall(PetscFree2(work,rwork));
     445          34 :   PetscCall(MatDestroy(&A));
     446          34 :   PetscFunctionReturn(PETSC_SUCCESS);
     447             : }
     448             : 
     449           9 : static PetscErrorCode BVSVDAndRank_QR_CAA(BV S,PetscInt M,PetscInt L,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
     450             : {
     451           9 :   PetscInt       i,j,n,ml=S->k;
     452           9 :   PetscBLASInt   m,k_,lda,lwork,info;
     453           9 :   PetscScalar    *work,*T,*U,*R,sone=1.0,zero=0.0;
     454           9 :   PetscReal      *rwork;
     455           9 :   Mat            A;
     456             : 
     457           9 :   PetscFunctionBegin;
     458             :   /* Compute QR factorizaton of S */
     459           9 :   PetscCall(BVGetSizes(S,NULL,&n,NULL));
     460           9 :   PetscCheck(n>=ml,PetscObjectComm((PetscObject)S),PETSC_ERR_SUP,"The QR_CAA method does not support problem size n < m*L");
     461           9 :   PetscCall(BVSetActiveColumns(S,0,ml));
     462           9 :   PetscCall(PetscArrayzero(pA,ml*ml));
     463           9 :   PetscCall(MatCreateDense(PETSC_COMM_SELF,ml,ml,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
     464           9 :   PetscCall(BVOrthogonalize(S,A));
     465           9 :   PetscCall(MatDestroy(&A));
     466             : 
     467             :   /* SVD of first (M-1)*L diagonal block */
     468           9 :   PetscCall(PetscBLASIntCast((M-1)*L,&m));
     469           9 :   PetscCall(PetscMalloc5(m*m,&T,m*m,&R,m*m,&U,5*ml,&work,5*ml,&rwork));
     470         305 :   for (j=0;j<m;j++) PetscCall(PetscArraycpy(R+j*m,pA+j*ml,m));
     471           9 :   lwork = 5*m;
     472           9 :   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
     473             : #if !defined (PETSC_USE_COMPLEX)
     474             :   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,&info));
     475             : #else
     476           9 :   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,rwork,&info));
     477             : #endif
     478           9 :   SlepcCheckLapackInfo("gesvd",info);
     479           9 :   PetscCall(PetscFPTrapPop());
     480           9 :   *rank = 0;
     481         305 :   for (i=0;i<m;i++) {
     482         592 :     if (sigma[i]/PetscMax(sigma[0],1)>delta) (*rank)++;
     483             :   }
     484           9 :   PetscCall(MatCreateDense(PETSC_COMM_SELF,m,m,PETSC_DECIDE,PETSC_DECIDE,U,&A));
     485           9 :   PetscCall(BVSetActiveColumns(S,0,m));
     486           9 :   PetscCall(BVMultInPlace(S,A,0,*rank));
     487           9 :   PetscCall(MatDestroy(&A));
     488             :   /* Projected linear system */
     489             :   /* m first columns of A have the right singular vectors */
     490           9 :   PetscCall(PetscBLASIntCast(*rank,&k_));
     491           9 :   PetscCall(PetscBLASIntCast(ml,&lda));
     492           9 :   PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&m,&k_,&m,&sone,pA+L*lda,&lda,R,&m,&zero,T,&m));
     493           9 :   PetscCall(PetscArrayzero(pA,ml*ml));
     494           9 :   PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&m,&sone,U,&m,T,&m,&zero,pA,&k_));
     495         793 :   for (j=0;j<k_;j++) for (i=0;i<k_;i++) pA[j*k_+i] /= sigma[j];
     496           9 :   PetscCall(PetscFree5(T,R,U,work,rwork));
     497           9 :   PetscFunctionReturn(PETSC_SUCCESS);
     498             : }
     499             : 
     500             : /*@
     501             :    BVSVDAndRank - Compute the SVD (left singular vectors only, and singular
     502             :    values) and determine the numerical rank according to a tolerance.
     503             : 
     504             :    Collective
     505             : 
     506             :    Input Parameters:
     507             : +  S     - the basis vectors
     508             : .  m     - the moment degree
     509             : .  l     - the block size
     510             : .  delta - the tolerance used to determine the rank
     511             : -  meth  - the method to be used
     512             : 
     513             :    Output Parameters:
     514             : +  A     - workspace, on output contains relevant values in the CAA method
     515             : .  sigma - computed singular values
     516             : -  rank  - estimated rank (optional)
     517             : 
     518             :    Notes:
     519             :    This function computes [U,Sigma,V] = svd(S) and replaces S with U.
     520             :    The current implementation computes this via S'*S, and it may include
     521             :    some kind of iterative refinement to improve accuracy in some cases.
     522             : 
     523             :    The parameters m and l refer to the moment and block size of contour
     524             :    integral methods. All columns up to m*l are modified, and the active
     525             :    columns are set to 0..m*l.
     526             : 
     527             :    The method is one of BV_SVD_METHOD_REFINE, BV_SVD_METHOD_QR, BV_SVD_METHOD_QR_CAA.
     528             : 
     529             :    The A workspace should be m*l*m*l in size.
     530             : 
     531             :    Once the decomposition is computed, the numerical rank is estimated
     532             :    by counting the number of singular values that are larger than the
     533             :    tolerance delta, relative to the first singular value.
     534             : 
     535             :    Level: developer
     536             : 
     537             : .seealso: BVSetActiveColumns()
     538             : @*/
     539          71 : PetscErrorCode BVSVDAndRank(BV S,PetscInt m,PetscInt l,PetscReal delta,BVSVDMethod meth,PetscScalar *A,PetscReal *sigma,PetscInt *rank)
     540             : {
     541          71 :   PetscFunctionBegin;
     542          71 :   PetscValidHeaderSpecific(S,BV_CLASSID,1);
     543         213 :   PetscValidLogicalCollectiveInt(S,m,2);
     544         213 :   PetscValidLogicalCollectiveInt(S,l,3);
     545         213 :   PetscValidLogicalCollectiveReal(S,delta,4);
     546         213 :   PetscValidLogicalCollectiveEnum(S,meth,5);
     547          71 :   PetscAssertPointer(A,6);
     548          71 :   PetscAssertPointer(sigma,7);
     549          71 :   PetscAssertPointer(rank,8);
     550             : 
     551          71 :   PetscCall(PetscLogEventBegin(BV_SVDAndRank,S,0,0,0));
     552          71 :   PetscCall(BVSetActiveColumns(S,0,m*l));
     553          71 :   switch (meth) {
     554          28 :     case BV_SVD_METHOD_REFINE:
     555          28 :       PetscCall(BVSVDAndRank_Refine(S,delta,A,sigma,rank));
     556             :       break;
     557          34 :     case BV_SVD_METHOD_QR:
     558          34 :       PetscCall(BVSVDAndRank_QR(S,delta,A,sigma,rank));
     559             :       break;
     560           9 :     case BV_SVD_METHOD_QR_CAA:
     561           9 :       PetscCall(BVSVDAndRank_QR_CAA(S,m,l,delta,A,sigma,rank));
     562             :       break;
     563             :   }
     564          71 :   PetscCall(PetscLogEventEnd(BV_SVDAndRank,S,0,0,0));
     565          71 :   PetscFunctionReturn(PETSC_SUCCESS);
     566             : }
     567             : 
     568             : /*@
     569             :    BVCISSResizeBases - Resize the bases involved in CISS solvers when the L grows.
     570             : 
     571             :    Logically Collective
     572             : 
     573             :    Input Parameters:
     574             : +  S      - basis of L*M columns
     575             : .  V      - basis of L columns (may be associated to subcommunicators)
     576             : .  Y      - basis of npoints*L columns
     577             : .  Lold   - old value of L
     578             : .  Lnew   - new value of L
     579             : .  M      - the moment size
     580             : -  npoints - number of integration points
     581             : 
     582             :    Level: developer
     583             : 
     584             : .seealso: BVResize()
     585             : @*/
     586           7 : PetscErrorCode BVCISSResizeBases(BV S,BV V,BV Y,PetscInt Lold,PetscInt Lnew,PetscInt M,PetscInt npoints)
     587             : {
     588           7 :   PetscInt       i,j;
     589             : 
     590           7 :   PetscFunctionBegin;
     591           7 :   PetscValidHeaderSpecific(S,BV_CLASSID,1);
     592           7 :   PetscValidHeaderSpecific(V,BV_CLASSID,2);
     593           7 :   PetscValidHeaderSpecific(Y,BV_CLASSID,3);
     594          21 :   PetscValidLogicalCollectiveInt(S,Lold,4);
     595          21 :   PetscValidLogicalCollectiveInt(S,Lnew,5);
     596          21 :   PetscValidLogicalCollectiveInt(S,M,6);
     597          21 :   PetscValidLogicalCollectiveInt(S,npoints,7);
     598             : 
     599           7 :   PetscCall(BVResize(S,Lnew*M,PETSC_FALSE));
     600           7 :   PetscCall(BVResize(V,Lnew,PETSC_TRUE));
     601           7 :   PetscCall(BVResize(Y,Lnew*npoints,PETSC_TRUE));
     602             :   /* columns of Y are interleaved */
     603         215 :   for (i=npoints-1;i>=0;i--) {
     604        1600 :     for (j=Lold-1;j>=0;j--) PetscCall(BVCopyColumn(Y,i*Lold+j,i*Lnew+j));
     605             :   }
     606           7 :   PetscFunctionReturn(PETSC_SUCCESS);
     607             : }

Generated by: LCOV version 1.14