LCOV - code coverage report
Current view: top level - sys/classes/bv/interface - bvcontour.c (source / functions) Hit Total Coverage
Test: SLEPc Lines: 163 286 57.0 %
Date: 2024-12-18 00:42:09 Functions: 5 9 55.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       3             :    SLEPc - Scalable Library for Eigenvalue Problem Computations
       4             :    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
       5             : 
       6             :    This file is part of SLEPc.
       7             :    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
       8             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       9             : */
      10             : /*
      11             :    BV developer functions needed in contour integral methods
      12             : */
      13             : 
      14             : #include <slepc/private/bvimpl.h>            /*I "slepcbv.h" I*/
      15             : #include <slepcblaslapack.h>
      16             : 
      17             : #define p_id(i) (i*subcomm->n + subcomm->color)
      18             : 
      19             : /*@
      20             :    BVScatter - Scatters the columns of a BV to another BV created in a
      21             :    subcommunicator.
      22             : 
      23             :    Collective
      24             : 
      25             :    Input Parameters:
      26             : +  Vin  - input basis vectors (defined on the whole communicator)
      27             : .  scat - VecScatter object that contains the info for the communication
      28             : -  xdup - an auxiliary vector
      29             : 
      30             :    Output Parameter:
      31             : .  Vout - output basis vectors (defined on the subcommunicator)
      32             : 
      33             :    Notes:
      34             :    Currently implemented as a loop for each the active column, where each
      35             :    column is scattered independently. The vector xdup is defined on the
      36             :    contiguous parent communicator and have enough space to store one
      37             :    duplicate of the original vector per each subcommunicator.
      38             : 
      39             :    Level: developer
      40             : 
      41             : .seealso: BVGetColumn()
      42             : @*/
      43           8 : PetscErrorCode BVScatter(BV Vin,BV Vout,VecScatter scat,Vec xdup)
      44             : {
      45           8 :   PetscInt          i;
      46           8 :   Vec               v,z;
      47           8 :   const PetscScalar *array;
      48             : 
      49           8 :   PetscFunctionBegin;
      50           8 :   PetscValidHeaderSpecific(Vin,BV_CLASSID,1);
      51           8 :   PetscValidHeaderSpecific(Vout,BV_CLASSID,2);
      52           8 :   PetscValidHeaderSpecific(scat,PETSCSF_CLASSID,3);
      53           8 :   PetscValidHeaderSpecific(xdup,VEC_CLASSID,4);
      54           8 :   PetscCall(BVCreateVec(Vout,&z));
      55         128 :   for (i=Vin->l;i<Vin->k;i++) {
      56         120 :     PetscCall(BVGetColumn(Vin,i,&v));
      57         120 :     PetscCall(VecScatterBegin(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
      58         120 :     PetscCall(VecScatterEnd(scat,v,xdup,INSERT_VALUES,SCATTER_FORWARD));
      59         120 :     PetscCall(BVRestoreColumn(Vin,i,&v));
      60         120 :     PetscCall(VecGetArrayRead(xdup,&array));
      61         120 :     PetscCall(VecPlaceArray(z,array));
      62         120 :     PetscCall(BVInsertVec(Vout,i,z));
      63         120 :     PetscCall(VecResetArray(z));
      64         120 :     PetscCall(VecRestoreArrayRead(xdup,&array));
      65             :   }
      66           8 :   PetscCall(VecDestroy(&z));
      67           8 :   PetscFunctionReturn(PETSC_SUCCESS);
      68             : }
      69             : 
      70             : /*@
      71             :    BVSumQuadrature - Computes the sum of terms required in the quadrature
      72             :    rule to approximate the contour integral.
      73             : 
      74             :    Collective
      75             : 
      76             :    Input Parameters:
      77             : +  Y       - input basis vectors
      78             : .  M       - number of moments
      79             : .  L       - block size
      80             : .  L_max   - maximum block size
      81             : .  w       - quadrature weights
      82             : .  zn      - normalized quadrature points
      83             : .  scat    - (optional) VecScatter object to communicate between subcommunicators
      84             : .  subcomm - subcommunicator layout
      85             : .  npoints - number of points to process by the subcommunicator
      86             : -  useconj - whether conjugate points can be used or not
      87             : 
      88             :    Output Parameter:
      89             : .  S       - output basis vectors
      90             : 
      91             :    Notes:
      92             :    This is a generalization of BVMult(). The resulting matrix S consists of M
      93             :    panels of L columns, and the following formula is computed for each panel
      94             :    S_k = sum_j w_j*zn_j^k*Y_j, where Y_j is the j-th panel of Y containing
      95             :    the result of solving T(z_j)^{-1}*X for each integration point j. L_max is
      96             :    the width of the panels in Y.
      97             : 
      98             :    When using subcommunicators, Y is stored in the subcommunicators for a subset
      99             :    of integration points. In that case, the computation is done in the subcomm
     100             :    and then scattered to the whole communicator in S using the VecScatter scat.
     101             :    The value npoints is the number of points to be processed in this subcomm
     102             :    and the flag useconj indicates whether symmetric points can be reused.
     103             : 
     104             :    Level: developer
     105             : 
     106             : .seealso: BVMult(), BVScatter(), BVDotQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
     107             : @*/
     108          26 : PetscErrorCode BVSumQuadrature(BV S,BV Y,PetscInt M,PetscInt L,PetscInt L_max,PetscScalar *w,PetscScalar *zn,VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
     109             : {
     110          26 :   PetscInt       i,j,k,nloc;
     111          26 :   Vec            v,sj;
     112          26 :   PetscScalar    *ppk,*pv,one=1.0;
     113             : 
     114          26 :   PetscFunctionBegin;
     115          26 :   PetscValidHeaderSpecific(S,BV_CLASSID,1);
     116          26 :   PetscValidHeaderSpecific(Y,BV_CLASSID,2);
     117          26 :   if (scat) PetscValidHeaderSpecific(scat,PETSCSF_CLASSID,8);
     118             : 
     119          26 :   PetscCall(BVGetSizes(Y,&nloc,NULL,NULL));
     120          26 :   PetscCall(PetscMalloc1(npoints,&ppk));
     121         678 :   for (i=0;i<npoints;i++) ppk[i] = 1.0;
     122          26 :   PetscCall(BVCreateVec(Y,&v));
     123         230 :   for (k=0;k<M;k++) {
     124        3128 :     for (j=0;j<L;j++) {
     125        2924 :       PetscCall(VecSet(v,0.0));
     126       74668 :       for (i=0;i<npoints;i++) {
     127       71744 :         PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
     128       71744 :         PetscCall(BVMultVec(Y,ppk[i]*w[p_id(i)],1.0,v,&one));
     129             :       }
     130        2924 :       if (PetscUnlikely(useconj)) {
     131           0 :         PetscCall(VecGetArray(v,&pv));
     132           0 :         for (i=0;i<nloc;i++) pv[i] = 2.0*PetscRealPart(pv[i]);
     133           0 :         PetscCall(VecRestoreArray(v,&pv));
     134             :       }
     135        2924 :       PetscCall(BVGetColumn(S,k*L+j,&sj));
     136        2924 :       if (PetscUnlikely(scat)) {
     137         960 :         PetscCall(VecScatterBegin(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
     138         960 :         PetscCall(VecScatterEnd(scat,v,sj,ADD_VALUES,SCATTER_REVERSE));
     139        1964 :       } else PetscCall(VecCopy(v,sj));
     140        2924 :       PetscCall(BVRestoreColumn(S,k*L+j,&sj));
     141             :     }
     142        5292 :     for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
     143             :   }
     144          26 :   PetscCall(PetscFree(ppk));
     145          26 :   PetscCall(VecDestroy(&v));
     146          26 :   PetscFunctionReturn(PETSC_SUCCESS);
     147             : }
     148             : 
     149             : /*@
     150             :    BVDotQuadrature - Computes the projection terms required in the quadrature
     151             :    rule to approximate the contour integral.
     152             : 
     153             :    Collective
     154             : 
     155             :    Input Parameters:
     156             : +  Y       - first basis vectors
     157             : .  V       - second basis vectors
     158             : .  M       - number of moments
     159             : .  L       - block size
     160             : .  L_max   - maximum block size
     161             : .  w       - quadrature weights
     162             : .  zn      - normalized quadrature points
     163             : .  subcomm - subcommunicator layout
     164             : .  npoints - number of points to process by the subcommunicator
     165             : -  useconj - whether conjugate points can be used or not
     166             : 
     167             :    Output Parameter:
     168             : .  Mu      - computed result
     169             : 
     170             :    Notes:
     171             :    This is a generalization of BVDot(). The resulting matrix Mu consists of M
     172             :    blocks of size LxL (placed horizontally), each of them computed as
     173             :    Mu_k = sum_j w_j*zn_j^k*V'*Y_j, where Y_j is the j-th panel of Y containing
     174             :    the result of solving T(z_j)^{-1}*X for each integration point j. L_max is
     175             :    the width of the panels in Y.
     176             : 
     177             :    When using subcommunicators, Y is stored in the subcommunicators for a subset
     178             :    of integration points. In that case, the computation is done in the subcomm
     179             :    and then the final result is combined via reduction.
     180             :    The value npoints is the number of points to be processed in this subcomm
     181             :    and the flag useconj indicates whether symmetric points can be reused.
     182             : 
     183             :    Level: developer
     184             : 
     185             : .seealso: BVDot(), BVScatter(), BVSumQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
     186             : @*/
     187           6 : PetscErrorCode BVDotQuadrature(BV Y,BV V,PetscScalar *Mu,PetscInt M,PetscInt L,PetscInt L_max,PetscScalar *w,PetscScalar *zn,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj)
     188             : {
     189           6 :   PetscMPIInt       sub_size,count;
     190           6 :   PetscInt          i,j,k,s;
     191           6 :   PetscScalar       *temp,*temp2,*ppk,alp;
     192           6 :   Mat               H;
     193           6 :   const PetscScalar *pH;
     194           6 :   MPI_Comm          child,parent;
     195             : 
     196           6 :   PetscFunctionBegin;
     197           6 :   PetscValidHeaderSpecific(Y,BV_CLASSID,1);
     198           6 :   PetscValidHeaderSpecific(V,BV_CLASSID,2);
     199             : 
     200           6 :   PetscCall(PetscSubcommGetChild(subcomm,&child));
     201           6 :   PetscCallMPI(MPI_Comm_size(child,&sub_size));
     202           6 :   PetscCall(PetscMalloc3(npoints*L*(L+1),&temp,2*M*L*L,&temp2,npoints,&ppk));
     203           6 :   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,L,L_max*npoints,NULL,&H));
     204           6 :   PetscCall(PetscArrayzero(temp2,2*M*L*L));
     205           6 :   PetscCall(BVSetActiveColumns(Y,0,L_max*npoints));
     206           6 :   PetscCall(BVSetActiveColumns(V,0,L));
     207           6 :   PetscCall(BVDot(Y,V,H));
     208           6 :   PetscCall(MatDenseGetArrayRead(H,&pH));
     209         190 :   for (i=0;i<npoints;i++) {
     210        1976 :     for (j=0;j<L;j++) {
     211       22592 :       for (k=0;k<L;k++) {
     212       20800 :         temp[k+j*L+i*L*L] = pH[k+j*L+i*L*L_max];
     213             :       }
     214             :     }
     215             :   }
     216           6 :   PetscCall(MatDenseRestoreArrayRead(H,&pH));
     217         190 :   for (i=0;i<npoints;i++) ppk[i] = 1;
     218          94 :   for (k=0;k<2*M;k++) {
     219        1008 :     for (j=0;j<L;j++) {
     220       28312 :       for (i=0;i<npoints;i++) {
     221       27392 :         alp = ppk[i]*w[p_id(i)];
     222      353792 :         for (s=0;s<L;s++) {
     223      326400 :           if (!useconj) temp2[s+(j+k*L)*L] += alp*temp[s+(j+i*L)*L];
     224           0 :           else temp2[s+(j+k*L)*L] += 2.0*PetscRealPart(alp*temp[s+(j+i*L)*L]);
     225             :         }
     226             :       }
     227             :     }
     228        2776 :     for (i=0;i<npoints;i++) ppk[i] *= zn[p_id(i)];
     229             :   }
     230       11230 :   for (i=0;i<2*M*L*L;i++) temp2[i] /= sub_size;
     231           6 :   PetscCall(PetscMPIIntCast(2*M*L*L,&count));
     232           6 :   PetscCall(PetscSubcommGetParent(subcomm,&parent));
     233          18 :   PetscCallMPI(MPIU_Allreduce(temp2,Mu,count,MPIU_SCALAR,MPIU_SUM,parent));
     234           6 :   PetscCall(PetscFree3(temp,temp2,ppk));
     235           6 :   PetscCall(MatDestroy(&H));
     236           6 :   PetscFunctionReturn(PETSC_SUCCESS);
     237             : }
     238             : 
     239             : /*@
     240             :    BVTraceQuadrature - Computes an estimate of the number of eigenvalues
     241             :    inside a region via quantities computed in the quadrature rule of
     242             :    contour integral methods.
     243             : 
     244             :    Collective
     245             : 
     246             :    Input Parameters:
     247             : +  Y       - first basis vectors
     248             : .  V       - second basis vectors
     249             : .  L       - block size
     250             : .  L_max   - maximum block size
     251             : .  w       - quadrature weights
     252             : .  scat    - (optional) VecScatter object to communicate between subcommunicators
     253             : .  subcomm - subcommunicator layout
     254             : .  npoints - number of points to process by the subcommunicator
     255             : -  useconj - whether conjugate points can be used or not
     256             : 
     257             :    Output Parameter:
     258             : .  est_eig - estimated eigenvalue count
     259             : 
     260             :    Notes:
     261             :    This function returns an estimation of the number of eigenvalues in the
     262             :    region, computed as trace(V'*S_0), where S_0 is the first panel of S
     263             :    computed by BVSumQuadrature().
     264             : 
     265             :    When using subcommunicators, Y is stored in the subcommunicators for a subset
     266             :    of integration points. In that case, the computation is done in the subcomm
     267             :    and then scattered to the whole communicator in S using the VecScatter scat.
     268             :    The value npoints is the number of points to be processed in this subcomm
     269             :    and the flag useconj indicates whether symmetric points can be reused.
     270             : 
     271             :    Level: developer
     272             : 
     273             : .seealso: BVScatter(), BVDotQuadrature(), BVSumQuadrature(), RGComputeQuadrature(), RGCanUseConjugates()
     274             : @*/
     275           0 : PetscErrorCode BVTraceQuadrature(BV Y,BV V,PetscInt L,PetscInt L_max,PetscScalar *w,VecScatter scat,PetscSubcomm subcomm,PetscInt npoints,PetscBool useconj,PetscReal *est_eig)
     276             : {
     277           0 :   PetscInt       i,j;
     278           0 :   Vec            y,yall,vj;
     279           0 :   PetscScalar    dot,sum=0.0,one=1.0;
     280             : 
     281           0 :   PetscFunctionBegin;
     282           0 :   PetscValidHeaderSpecific(Y,BV_CLASSID,1);
     283           0 :   PetscValidHeaderSpecific(V,BV_CLASSID,2);
     284           0 :   if (scat) PetscValidHeaderSpecific(scat,PETSCSF_CLASSID,6);
     285             : 
     286           0 :   PetscCall(BVCreateVec(Y,&y));
     287           0 :   PetscCall(BVCreateVec(V,&yall));
     288           0 :   for (j=0;j<L;j++) {
     289           0 :     PetscCall(VecSet(y,0.0));
     290           0 :     for (i=0;i<npoints;i++) {
     291           0 :       PetscCall(BVSetActiveColumns(Y,i*L_max+j,i*L_max+j+1));
     292           0 :       PetscCall(BVMultVec(Y,w[p_id(i)],1.0,y,&one));
     293             :     }
     294           0 :     PetscCall(BVGetColumn(V,j,&vj));
     295           0 :     if (scat) {
     296           0 :       PetscCall(VecScatterBegin(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
     297           0 :       PetscCall(VecScatterEnd(scat,y,yall,ADD_VALUES,SCATTER_REVERSE));
     298           0 :       PetscCall(VecDot(vj,yall,&dot));
     299           0 :     } else PetscCall(VecDot(vj,y,&dot));
     300           0 :     PetscCall(BVRestoreColumn(V,j,&vj));
     301           0 :     if (useconj) sum += 2.0*PetscRealPart(dot);
     302           0 :     else sum += dot;
     303             :   }
     304           0 :   *est_eig = PetscAbsScalar(sum)/(PetscReal)L;
     305           0 :   PetscCall(VecDestroy(&y));
     306           0 :   PetscCall(VecDestroy(&yall));
     307           0 :   PetscFunctionReturn(PETSC_SUCCESS);
     308             : }
     309             : 
     310          21 : static PetscErrorCode BVSVDAndRank_Refine(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
     311             : {
     312          21 :   PetscInt       i,j,k,ml=S->k;
     313          21 :   PetscMPIInt    len;
     314          21 :   PetscScalar    *work,*B,*tempB,*sarray,*Q1,*Q2,*temp2,alpha=1.0,beta=0.0;
     315          21 :   PetscBLASInt   l,m,n,lda,ldu,ldvt,lwork,info,ldb,ldc,lds;
     316             : #if defined(PETSC_USE_COMPLEX)
     317             :   PetscReal      *rwork;
     318             : #endif
     319             : 
     320          21 :   PetscFunctionBegin;
     321          21 :   PetscCall(PetscBLASIntCast(S->ld,&lds));
     322          21 :   PetscCall(BVGetArray(S,&sarray));
     323          21 :   PetscCall(PetscMalloc6(ml*ml,&temp2,S->n*ml,&Q1,S->n*ml,&Q2,ml*ml,&B,ml*ml,&tempB,5*ml,&work));
     324             : #if defined(PETSC_USE_COMPLEX)
     325             :   PetscCall(PetscMalloc1(5*ml,&rwork));
     326             : #endif
     327          21 :   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
     328             : 
     329          21 :   PetscCall(PetscArrayzero(B,ml*ml));
     330        2505 :   for (i=0;i<ml;i++) B[i*ml+i]=1;
     331             : 
     332          63 :   for (k=0;k<2;k++) {
     333          42 :     PetscCall(PetscBLASIntCast(S->n,&m));
     334          42 :     PetscCall(PetscBLASIntCast(ml,&l));
     335          42 :     n = l; lda = m; ldb = m; ldc = l;
     336          42 :     if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,sarray,&lds,sarray,&lds,&beta,pA,&ldc));
     337          21 :     else PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&l,&n,&m,&alpha,Q1,&lda,Q1,&ldb,&beta,pA,&ldc));
     338          42 :     PetscCall(PetscArrayzero(temp2,ml*ml));
     339          42 :     PetscCall(PetscMPIIntCast(ml*ml,&len));
     340         126 :     PetscCallMPI(MPIU_Allreduce(pA,temp2,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)S)));
     341             : 
     342          42 :     PetscCall(PetscBLASIntCast(ml,&m));
     343          42 :     n = m; lda = m; lwork = 5*m, ldu = 1; ldvt = 1;
     344             : #if defined(PETSC_USE_COMPLEX)
     345             :     PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
     346             : #else
     347          42 :     PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&m,&n,temp2,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
     348             : #endif
     349          42 :     SlepcCheckLapackInfo("gesvd",info);
     350             : 
     351          42 :     PetscCall(PetscBLASIntCast(S->n,&l));
     352          42 :     PetscCall(PetscBLASIntCast(ml,&n));
     353          42 :     m = n; lda = l; ldb = m; ldc = l;
     354          42 :     if (!k) PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,sarray,&lds,temp2,&ldb,&beta,Q1,&ldc));
     355          21 :     else PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,Q1,&lda,temp2,&ldb,&beta,Q2,&ldc));
     356             : 
     357          42 :     PetscCall(PetscBLASIntCast(ml,&l));
     358          42 :     m = l; n = l; lda = l; ldb = m; ldc = l;
     359          42 :     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&m,&alpha,B,&lda,temp2,&ldb,&beta,tempB,&ldc));
     360        5010 :     for (i=0;i<ml;i++) {
     361        4968 :       sigma[i] = PetscSqrtReal(sigma[i]);
     362     1660712 :       for (j=0;j<S->n;j++) {
     363     1655744 :         if (k%2) Q2[j+i*S->n] /= sigma[i];
     364      827872 :         else Q1[j+i*S->n] /= sigma[i];
     365             :       }
     366      616584 :       for (j=0;j<ml;j++) B[j+i*ml] = tempB[j+i*ml]*sigma[i];
     367             :     }
     368             :   }
     369             : 
     370          21 :   PetscCall(PetscBLASIntCast(ml,&m));
     371          21 :   n = m; lda = m; ldu=1; ldvt=1;
     372             : #if defined(PETSC_USE_COMPLEX)
     373             :   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,rwork,&info));
     374             : #else
     375          21 :   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&m,&n,B,&lda,sigma,NULL,&ldu,NULL,&ldvt,work,&lwork,&info));
     376             : #endif
     377          21 :   SlepcCheckLapackInfo("gesvd",info);
     378             : 
     379          21 :   PetscCall(PetscBLASIntCast(S->n,&l));
     380          21 :   PetscCall(PetscBLASIntCast(ml,&n));
     381          21 :   m = n; lda = l; ldb = m;
     382          21 :   if (k%2) PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q1,&lda,B,&ldb,&beta,sarray,&lds));
     383          21 :   else PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&l,&n,&m,&alpha,Q2,&lda,B,&ldb,&beta,sarray,&lds));
     384             : 
     385          21 :   PetscCall(PetscFPTrapPop());
     386          21 :   PetscCall(BVRestoreArray(S,&sarray));
     387             : 
     388          21 :   if (rank) {
     389          21 :     (*rank) = 0;
     390        2505 :     for (i=0;i<ml;i++) {
     391        4840 :       if (sigma[i]/PetscMax(sigma[0],1.0)>delta) (*rank)++;
     392             :     }
     393             :   }
     394          21 :   PetscCall(PetscFree6(temp2,Q1,Q2,B,tempB,work));
     395             : #if defined(PETSC_USE_COMPLEX)
     396             :   PetscCall(PetscFree(rwork));
     397             : #endif
     398          21 :   PetscFunctionReturn(PETSC_SUCCESS);
     399             : }
     400             : 
     401           0 : static PetscErrorCode BVSVDAndRank_QR(BV S,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
     402             : {
     403           0 :   PetscInt       i,n,ml=S->k;
     404           0 :   PetscBLASInt   m,lda,lwork,info;
     405           0 :   PetscScalar    *work;
     406           0 :   PetscReal      *rwork;
     407           0 :   Mat            A;
     408           0 :   Vec            v;
     409             : 
     410           0 :   PetscFunctionBegin;
     411             :   /* Compute QR factorizaton of S */
     412           0 :   PetscCall(BVGetSizes(S,NULL,&n,NULL));
     413           0 :   n = PetscMin(n,ml);
     414           0 :   PetscCall(BVSetActiveColumns(S,0,n));
     415           0 :   PetscCall(PetscArrayzero(pA,ml*n));
     416           0 :   PetscCall(MatCreateDense(PETSC_COMM_SELF,n,n,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
     417           0 :   PetscCall(BVOrthogonalize(S,A));
     418           0 :   if (n<ml) {
     419             :     /* the rest of the factorization */
     420           0 :     for (i=n;i<ml;i++) {
     421           0 :       PetscCall(BVGetColumn(S,i,&v));
     422           0 :       PetscCall(BVOrthogonalizeVec(S,v,pA+i*n,NULL,NULL));
     423           0 :       PetscCall(BVRestoreColumn(S,i,&v));
     424             :     }
     425             :   }
     426           0 :   PetscCall(PetscBLASIntCast(n,&lda));
     427           0 :   PetscCall(PetscBLASIntCast(ml,&m));
     428           0 :   PetscCall(PetscMalloc2(5*ml,&work,5*ml,&rwork));
     429           0 :   lwork = 5*m;
     430           0 :   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
     431             : #if !defined (PETSC_USE_COMPLEX)
     432           0 :   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,&info));
     433             : #else
     434             :   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&lda,&m,pA,&lda,sigma,NULL,&lda,NULL,&lda,work,&lwork,rwork,&info));
     435             : #endif
     436           0 :   SlepcCheckLapackInfo("gesvd",info);
     437           0 :   PetscCall(PetscFPTrapPop());
     438           0 :   *rank = 0;
     439           0 :   for (i=0;i<n;i++) {
     440           0 :     if (sigma[i]/PetscMax(sigma[0],1)>delta) (*rank)++;
     441             :   }
     442             :   /* n first columns of A have the left singular vectors */
     443           0 :   PetscCall(BVMultInPlace(S,A,0,*rank));
     444           0 :   PetscCall(PetscFree2(work,rwork));
     445           0 :   PetscCall(MatDestroy(&A));
     446           0 :   PetscFunctionReturn(PETSC_SUCCESS);
     447             : }
     448             : 
     449           0 : static PetscErrorCode BVSVDAndRank_QR_CAA(BV S,PetscInt M,PetscInt L,PetscReal delta,PetscScalar *pA,PetscReal *sigma,PetscInt *rank)
     450             : {
     451           0 :   PetscInt       i,j,n,ml=S->k;
     452           0 :   PetscBLASInt   m,k_,lda,lwork,info;
     453           0 :   PetscScalar    *work,*T,*U,*R,sone=1.0,zero=0.0;
     454           0 :   PetscReal      *rwork;
     455           0 :   Mat            A;
     456             : 
     457           0 :   PetscFunctionBegin;
     458             :   /* Compute QR factorizaton of S */
     459           0 :   PetscCall(BVGetSizes(S,NULL,&n,NULL));
     460           0 :   PetscCheck(n>=ml,PetscObjectComm((PetscObject)S),PETSC_ERR_SUP,"The QR_CAA method does not support problem size n < m*L");
     461           0 :   PetscCall(BVSetActiveColumns(S,0,ml));
     462           0 :   PetscCall(PetscArrayzero(pA,ml*ml));
     463           0 :   PetscCall(MatCreateDense(PETSC_COMM_SELF,ml,ml,PETSC_DECIDE,PETSC_DECIDE,pA,&A));
     464           0 :   PetscCall(BVOrthogonalize(S,A));
     465           0 :   PetscCall(MatDestroy(&A));
     466             : 
     467             :   /* SVD of first (M-1)*L diagonal block */
     468           0 :   PetscCall(PetscBLASIntCast((M-1)*L,&m));
     469           0 :   PetscCall(PetscMalloc5(m*m,&T,m*m,&R,m*m,&U,5*ml,&work,5*ml,&rwork));
     470           0 :   for (j=0;j<m;j++) PetscCall(PetscArraycpy(R+j*m,pA+j*ml,m));
     471           0 :   lwork = 5*m;
     472           0 :   PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
     473             : #if !defined (PETSC_USE_COMPLEX)
     474           0 :   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,&info));
     475             : #else
     476             :   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&m,&m,R,&m,sigma,U,&m,NULL,&m,work,&lwork,rwork,&info));
     477             : #endif
     478           0 :   SlepcCheckLapackInfo("gesvd",info);
     479           0 :   PetscCall(PetscFPTrapPop());
     480           0 :   *rank = 0;
     481           0 :   for (i=0;i<m;i++) {
     482           0 :     if (sigma[i]/PetscMax(sigma[0],1)>delta) (*rank)++;
     483             :   }
     484           0 :   PetscCall(MatCreateDense(PETSC_COMM_SELF,m,m,PETSC_DECIDE,PETSC_DECIDE,U,&A));
     485           0 :   PetscCall(BVSetActiveColumns(S,0,m));
     486           0 :   PetscCall(BVMultInPlace(S,A,0,*rank));
     487           0 :   PetscCall(MatDestroy(&A));
     488             :   /* Projected linear system */
     489             :   /* m first columns of A have the right singular vectors */
     490           0 :   PetscCall(PetscBLASIntCast(*rank,&k_));
     491           0 :   PetscCall(PetscBLASIntCast(ml,&lda));
     492           0 :   PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&m,&k_,&m,&sone,pA+L*lda,&lda,R,&m,&zero,T,&m));
     493           0 :   PetscCall(PetscArrayzero(pA,ml*ml));
     494           0 :   PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&m,&sone,U,&m,T,&m,&zero,pA,&k_));
     495           0 :   for (j=0;j<k_;j++) for (i=0;i<k_;i++) pA[j*k_+i] /= sigma[j];
     496           0 :   PetscCall(PetscFree5(T,R,U,work,rwork));
     497           0 :   PetscFunctionReturn(PETSC_SUCCESS);
     498             : }
     499             : 
     500             : /*@
     501             :    BVSVDAndRank - Compute the SVD (left singular vectors only, and singular
     502             :    values) and determine the numerical rank according to a tolerance.
     503             : 
     504             :    Collective
     505             : 
     506             :    Input Parameters:
     507             : +  S     - the basis vectors
     508             : .  m     - the moment degree
     509             : .  l     - the block size
     510             : .  delta - the tolerance used to determine the rank
     511             : -  meth  - the method to be used
     512             : 
     513             :    Output Parameters:
     514             : +  A     - workspace, on output contains relevant values in the CAA method
     515             : .  sigma - computed singular values
     516             : -  rank  - estimated rank (optional)
     517             : 
     518             :    Notes:
     519             :    This function computes [U,Sigma,V] = svd(S) and replaces S with U.
     520             :    The current implementation computes this via S'*S, and it may include
     521             :    some kind of iterative refinement to improve accuracy in some cases.
     522             : 
     523             :    The parameters m and l refer to the moment and block size of contour
     524             :    integral methods. All columns up to m*l are modified, and the active
     525             :    columns are set to 0..m*l.
     526             : 
     527             :    The method is one of BV_SVD_METHOD_REFINE, BV_SVD_METHOD_QR, BV_SVD_METHOD_QR_CAA.
     528             : 
     529             :    The A workspace should be m*l*m*l in size.
     530             : 
     531             :    Once the decomposition is computed, the numerical rank is estimated
     532             :    by counting the number of singular values that are larger than the
     533             :    tolerance delta, relative to the first singular value.
     534             : 
     535             :    Level: developer
     536             : 
     537             : .seealso: BVSetActiveColumns()
     538             : @*/
     539          21 : PetscErrorCode BVSVDAndRank(BV S,PetscInt m,PetscInt l,PetscReal delta,BVSVDMethod meth,PetscScalar *A,PetscReal *sigma,PetscInt *rank)
     540             : {
     541          21 :   PetscFunctionBegin;
     542          21 :   PetscValidHeaderSpecific(S,BV_CLASSID,1);
     543          63 :   PetscValidLogicalCollectiveInt(S,m,2);
     544          63 :   PetscValidLogicalCollectiveInt(S,l,3);
     545          63 :   PetscValidLogicalCollectiveReal(S,delta,4);
     546          63 :   PetscValidLogicalCollectiveEnum(S,meth,5);
     547          21 :   PetscAssertPointer(A,6);
     548          21 :   PetscAssertPointer(sigma,7);
     549          21 :   PetscAssertPointer(rank,8);
     550             : 
     551          21 :   PetscCall(PetscLogEventBegin(BV_SVDAndRank,S,0,0,0));
     552          21 :   PetscCall(BVSetActiveColumns(S,0,m*l));
     553          21 :   switch (meth) {
     554          21 :     case BV_SVD_METHOD_REFINE:
     555          21 :       PetscCall(BVSVDAndRank_Refine(S,delta,A,sigma,rank));
     556             :       break;
     557           0 :     case BV_SVD_METHOD_QR:
     558           0 :       PetscCall(BVSVDAndRank_QR(S,delta,A,sigma,rank));
     559             :       break;
     560           0 :     case BV_SVD_METHOD_QR_CAA:
     561           0 :       PetscCall(BVSVDAndRank_QR_CAA(S,m,l,delta,A,sigma,rank));
     562             :       break;
     563             :   }
     564          21 :   PetscCall(PetscLogEventEnd(BV_SVDAndRank,S,0,0,0));
     565          21 :   PetscFunctionReturn(PETSC_SUCCESS);
     566             : }
     567             : 
     568             : /*@
     569             :    BVCISSResizeBases - Resize the bases involved in CISS solvers when the L grows.
     570             : 
     571             :    Logically Collective
     572             : 
     573             :    Input Parameters:
     574             : +  S      - basis of L*M columns
     575             : .  V      - basis of L columns (may be associated to subcommunicators)
     576             : .  Y      - basis of npoints*L columns
     577             : .  Lold   - old value of L
     578             : .  Lnew   - new value of L
     579             : .  M      - the moment size
     580             : -  npoints - number of integration points
     581             : 
     582             :    Level: developer
     583             : 
     584             : .seealso: BVResize()
     585             : @*/
     586           0 : PetscErrorCode BVCISSResizeBases(BV S,BV V,BV Y,PetscInt Lold,PetscInt Lnew,PetscInt M,PetscInt npoints)
     587             : {
     588           0 :   PetscInt       i,j;
     589             : 
     590           0 :   PetscFunctionBegin;
     591           0 :   PetscValidHeaderSpecific(S,BV_CLASSID,1);
     592           0 :   PetscValidHeaderSpecific(V,BV_CLASSID,2);
     593           0 :   PetscValidHeaderSpecific(Y,BV_CLASSID,3);
     594           0 :   PetscValidLogicalCollectiveInt(S,Lold,4);
     595           0 :   PetscValidLogicalCollectiveInt(S,Lnew,5);
     596           0 :   PetscValidLogicalCollectiveInt(S,M,6);
     597           0 :   PetscValidLogicalCollectiveInt(S,npoints,7);
     598             : 
     599           0 :   PetscCall(BVResize(S,Lnew*M,PETSC_FALSE));
     600           0 :   PetscCall(BVResize(V,Lnew,PETSC_TRUE));
     601           0 :   PetscCall(BVResize(Y,Lnew*npoints,PETSC_TRUE));
     602             :   /* columns of Y are interleaved */
     603           0 :   for (i=npoints-1;i>=0;i--) {
     604           0 :     for (j=Lold-1;j>=0;j--) PetscCall(BVCopyColumn(Y,i*Lold+j,i*Lnew+j));
     605             :   }
     606           0 :   PetscFunctionReturn(PETSC_SUCCESS);
     607             : }

Generated by: LCOV version 1.14