LCOV - code coverage report
Current view: top level - nep/impls - nepdefl.c (source / functions) Hit Total Coverage
Test: SLEPc Lines: 686 721 95.1 %
Date: 2024-04-25 00:48:42 Functions: 23 24 95.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       3             :    SLEPc - Scalable Library for Eigenvalue Problem Computations
       4             :    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
       5             : 
       6             :    This file is part of SLEPc.
       7             :    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
       8             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       9             : */
      10             : 
      11             : #include <slepc/private/nepimpl.h>         /*I "slepcnep.h" I*/
      12             : #include <slepcblaslapack.h>
      13             : #include "nepdefl.h"
      14             : 
      15          65 : PetscErrorCode NEPDeflationGetInvariantPair(NEP_EXT_OP extop,BV *X,Mat *H)
      16             : {
      17          65 :   PetscFunctionBegin;
      18          65 :   if (X) *X = extop->X;
      19          65 :   if (H) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,extop->szd+1,extop->szd+1,extop->H,H));
      20          65 :   PetscFunctionReturn(PETSC_SUCCESS);
      21             : }
      22             : 
      23         118 : static PetscErrorCode NEPDeflationExtendInvariantPair(NEP_EXT_OP extop,Vec u,PetscScalar lambda,PetscInt k)
      24             : {
      25         118 :   Vec            uu;
      26         118 :   PetscInt       ld,i;
      27         118 :   PetscMPIInt    np;
      28         118 :   PetscReal      norm;
      29             : 
      30         118 :   PetscFunctionBegin;
      31         118 :   PetscCall(BVGetColumn(extop->X,k,&uu));
      32         118 :   ld = extop->szd+1;
      33         118 :   PetscCall(NEPDeflationCopyToExtendedVec(extop,uu,extop->H+k*ld,u,PETSC_TRUE));
      34         118 :   PetscCall(BVRestoreColumn(extop->X,k,&uu));
      35         118 :   PetscCall(BVNormColumn(extop->X,k,NORM_2,&norm));
      36         118 :   PetscCall(BVScaleColumn(extop->X,k,1.0/norm));
      37         118 :   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)u),&np));
      38         197 :   for (i=0;i<k;i++) extop->H[k*ld+i] *= PetscSqrtReal(np)/norm;
      39         118 :   extop->H[k*(ld+1)] = lambda;
      40         118 :   PetscFunctionReturn(PETSC_SUCCESS);
      41             : }
      42             : 
      43          20 : PetscErrorCode NEPDeflationExtractEigenpair(NEP_EXT_OP extop,PetscInt k,Vec u,PetscScalar lambda,DS ds)
      44             : {
      45          20 :   Mat            A,H;
      46          20 :   PetscInt       ldh=extop->szd+1,ldds,k1=k+1;
      47          20 :   PetscScalar    *eigr,*eigi,*t,*Z;
      48          20 :   Vec            x;
      49             : 
      50          20 :   PetscFunctionBegin;
      51          20 :   PetscCall(NEPDeflationExtendInvariantPair(extop,u,lambda,k));
      52          20 :   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k1,k1,extop->H,&H));
      53          20 :   PetscCall(MatDenseSetLDA(H,ldh));
      54          20 :   PetscCall(PetscCalloc3(k1,&eigr,k1,&eigi,extop->szd,&t));
      55          20 :   PetscCall(DSReset(ds));
      56          20 :   PetscCall(DSSetType(ds,DSNHEP));
      57          20 :   PetscCall(DSAllocate(ds,ldh));
      58          20 :   PetscCall(DSGetLeadingDimension(ds,&ldds));
      59          20 :   PetscCall(DSSetDimensions(ds,k1,0,k1));
      60          20 :   PetscCall(DSGetMat(ds,DS_MAT_A,&A));
      61          20 :   PetscCall(MatCopy(H,A,SAME_NONZERO_PATTERN));
      62          20 :   PetscCall(DSRestoreMat(ds,DS_MAT_A,&A));
      63          20 :   PetscCall(MatDestroy(&H));
      64          20 :   PetscCall(DSSolve(ds,eigr,eigi));
      65          20 :   PetscCall(DSVectors(ds,DS_MAT_X,&k,NULL));
      66          20 :   PetscCall(DSGetArray(ds,DS_MAT_X,&Z));
      67          20 :   PetscCall(BVMultColumn(extop->X,1.0,Z[k*ldds+k],k,Z+k*ldds));
      68          20 :   PetscCall(DSRestoreArray(ds,DS_MAT_X,&Z));
      69          20 :   PetscCall(BVGetColumn(extop->X,k,&x));
      70          20 :   PetscCall(NEPDeflationCopyToExtendedVec(extop,x,t,u,PETSC_FALSE));
      71          20 :   PetscCall(BVRestoreColumn(extop->X,k,&x));
      72          20 :   PetscCall(PetscFree3(eigr,eigi,t));
      73          20 :   PetscFunctionReturn(PETSC_SUCCESS);
      74             : }
      75             : 
      76         271 : PetscErrorCode NEPDeflationCopyToExtendedVec(NEP_EXT_OP extop,Vec v,PetscScalar *a,Vec vex,PetscBool back)
      77             : {
      78         271 :   PetscMPIInt    np,rk,count;
      79         271 :   PetscScalar    *array1,*array2;
      80         271 :   PetscInt       nloc;
      81             : 
      82         271 :   PetscFunctionBegin;
      83         271 :   if (extop->szd) {
      84         177 :     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)vex),&rk));
      85         177 :     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)vex),&np));
      86         177 :     PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
      87         177 :     if (v) {
      88         177 :       PetscCall(VecGetArray(v,&array1));
      89         177 :       PetscCall(VecGetArray(vex,&array2));
      90         177 :       if (back) PetscCall(PetscArraycpy(array1,array2,nloc));
      91          38 :       else PetscCall(PetscArraycpy(array2,array1,nloc));
      92         177 :       PetscCall(VecRestoreArray(v,&array1));
      93         177 :       PetscCall(VecRestoreArray(vex,&array2));
      94             :     }
      95         177 :     if (a) {
      96         159 :       PetscCall(VecGetArray(vex,&array2));
      97         159 :       if (back) {
      98         139 :         PetscCall(PetscArraycpy(a,array2+nloc,extop->szd));
      99         139 :         PetscCall(PetscMPIIntCast(extop->szd,&count));
     100         278 :         PetscCallMPI(MPI_Bcast(a,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex)));
     101             :       } else {
     102          20 :         PetscCall(PetscArraycpy(array2+nloc,a,extop->szd));
     103          20 :         PetscCall(PetscMPIIntCast(extop->szd,&count));
     104          40 :         PetscCallMPI(MPI_Bcast(array2+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex)));
     105             :       }
     106         159 :       PetscCall(VecRestoreArray(vex,&array2));
     107             :     }
     108             :   } else {
     109          94 :     if (back) PetscCall(VecCopy(vex,v));
     110          47 :     else PetscCall(VecCopy(v,vex));
     111             :   }
     112         271 :   PetscFunctionReturn(PETSC_SUCCESS);
     113             : }
     114             : 
     115          72 : PetscErrorCode NEPDeflationCreateVec(NEP_EXT_OP extop,Vec *v)
     116             : {
     117          72 :   PetscInt       nloc;
     118          72 :   Vec            u;
     119          72 :   VecType        type;
     120             : 
     121          72 :   PetscFunctionBegin;
     122          72 :   if (extop->szd) {
     123          24 :     PetscCall(BVGetColumn(extop->nep->V,0,&u));
     124          24 :     PetscCall(VecGetType(u,&type));
     125          24 :     PetscCall(BVRestoreColumn(extop->nep->V,0,&u));
     126          24 :     PetscCall(VecCreate(PetscObjectComm((PetscObject)extop->nep),v));
     127          24 :     PetscCall(VecSetType(*v,type));
     128          24 :     PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
     129          24 :     nloc += extop->szd;
     130          24 :     PetscCall(VecSetSizes(*v,nloc,PETSC_DECIDE));
     131          48 :   } else PetscCall(BVCreateVec(extop->nep->V,v));
     132          72 :   PetscFunctionReturn(PETSC_SUCCESS);
     133             : }
     134             : 
     135          17 : PetscErrorCode NEPDeflationCreateBV(NEP_EXT_OP extop,PetscInt sz,BV *V)
     136             : {
     137          17 :   PetscInt           nloc;
     138          17 :   BVType             type;
     139          17 :   BVOrthogType       otype;
     140          17 :   BVOrthogRefineType oref;
     141          17 :   PetscReal          oeta;
     142          17 :   BVOrthogBlockType  oblock;
     143          17 :   NEP                nep=extop->nep;
     144          17 :   VecType            vtype;
     145             : 
     146          17 :   PetscFunctionBegin;
     147          17 :   if (extop->szd) {
     148           3 :     PetscCall(BVGetSizes(nep->V,&nloc,NULL,NULL));
     149           3 :     PetscCall(BVCreate(PetscObjectComm((PetscObject)nep),V));
     150           3 :     PetscCall(BVSetSizes(*V,nloc+extop->szd,PETSC_DECIDE,sz));
     151           3 :     PetscCall(BVGetType(nep->V,&type));
     152           3 :     PetscCall(BVSetType(*V,type));
     153           3 :     PetscCall(BVGetVecType(nep->V,&vtype));
     154           3 :     PetscCall(BVSetVecType(*V,vtype));
     155           3 :     PetscCall(BVGetOrthogonalization(nep->V,&otype,&oref,&oeta,&oblock));
     156           3 :     PetscCall(BVSetOrthogonalization(*V,otype,oref,oeta,oblock));
     157           3 :     PetscCall(PetscObjectStateIncrease((PetscObject)*V));
     158          14 :   } else PetscCall(BVDuplicateResize(nep->V,sz,V));
     159          17 :   PetscFunctionReturn(PETSC_SUCCESS);
     160             : }
     161             : 
     162          17 : PetscErrorCode NEPDeflationSetRandomVec(NEP_EXT_OP extop,Vec v)
     163             : {
     164          17 :   PetscInt       n,next,i;
     165          17 :   PetscRandom    rand;
     166          17 :   PetscScalar    *array;
     167          17 :   PetscMPIInt    nn,np;
     168             : 
     169          17 :   PetscFunctionBegin;
     170          17 :   PetscCall(BVGetRandomContext(extop->nep->V,&rand));
     171          17 :   PetscCall(VecSetRandom(v,rand));
     172          17 :   if (extop->szd) {
     173          17 :     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v),&np));
     174          17 :     PetscCall(BVGetSizes(extop->nep->V,&n,NULL,NULL));
     175          17 :     PetscCall(VecGetLocalSize(v,&next));
     176          17 :     PetscCall(VecGetArray(v,&array));
     177          25 :     for (i=n+extop->n;i<next;i++) array[i] = 0.0;
     178          42 :     for (i=n;i<n+extop->n;i++) array[i] /= PetscSqrtReal(np);
     179          17 :     PetscCall(PetscMPIIntCast(extop->n,&nn));
     180          34 :     PetscCallMPI(MPI_Bcast(array+n,nn,MPIU_SCALAR,0,PetscObjectComm((PetscObject)v)));
     181          17 :     PetscCall(VecRestoreArray(v,&array));
     182             :   }
     183          17 :   PetscFunctionReturn(PETSC_SUCCESS);
     184             : }
     185             : 
     186          33 : static PetscErrorCode NEPDeflationEvaluateBasisMat(NEP_EXT_OP extop,PetscInt idx,PetscBool hat,PetscScalar *bval,PetscScalar *Hj,PetscScalar *Hjprev)
     187             : {
     188          33 :   PetscInt       i,k,n=extop->n,ldhj=extop->szd,ldh=extop->szd+1;
     189          33 :   PetscScalar    sone=1.0,zero=0.0;
     190          33 :   PetscBLASInt   ldh_,ldhj_,n_;
     191             : 
     192          33 :   PetscFunctionBegin;
     193          33 :   i = (idx<0)?extop->szd*extop->szd*(-idx):extop->szd*extop->szd;
     194          33 :   PetscCall(PetscArrayzero(Hj,i));
     195          33 :   PetscCall(PetscBLASIntCast(ldhj+1,&ldh_));
     196          33 :   PetscCall(PetscBLASIntCast(ldhj,&ldhj_));
     197          33 :   PetscCall(PetscBLASIntCast(n,&n_));
     198          33 :   if (idx<1) {
     199          82 :     if (!hat) for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 1.0;
     200           0 :     else for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 0.0;
     201             :   } else {
     202           0 :       for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[idx-1];
     203           0 :       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hjprev,&ldhj_,&zero,Hj,&ldhj_));
     204           0 :       for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[idx-1];
     205           0 :       if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[idx-1];
     206             :   }
     207          33 :   if (idx<0) {
     208          33 :     idx = -idx;
     209          33 :     for (k=1;k<idx;k++) {
     210           0 :       for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[k-1];
     211           0 :       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hj+(k-1)*ldhj*ldhj,&ldhj_,&zero,Hj+k*ldhj*ldhj,&ldhj_));
     212           0 :       for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[k-1];
     213           0 :       if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[k-1];
     214             :     }
     215             :   }
     216          33 :   PetscFunctionReturn(PETSC_SUCCESS);
     217             : }
     218             : 
     219          98 : PetscErrorCode NEPDeflationLocking(NEP_EXT_OP extop,Vec u,PetscScalar lambda)
     220             : {
     221          98 :   PetscInt       i;
     222             : 
     223          98 :   PetscFunctionBegin;
     224          98 :   PetscCall(NEPDeflationExtendInvariantPair(extop,u,lambda,extop->n));
     225          98 :   extop->n++;
     226          98 :   PetscCall(BVSetActiveColumns(extop->X,0,extop->n));
     227          98 :   if (extop->n <= extop->szd) {
     228             :     /* update XpX */
     229          33 :     PetscCall(BVDotColumn(extop->X,extop->n-1,extop->XpX+(extop->n-1)*extop->szd));
     230          33 :     extop->XpX[(extop->n-1)*(1+extop->szd)] = 1.0;
     231          49 :     for (i=0;i<extop->n-1;i++) extop->XpX[i*extop->szd+extop->n-1] = PetscConj(extop->XpX[(extop->n-1)*extop->szd+i]);
     232             :     /* determine minimality index */
     233          33 :     extop->midx = PetscMin(extop->max_midx,extop->n);
     234             :     /* polynominal basis coefficients */
     235          66 :     for (i=0;i<extop->midx;i++) extop->bc[i] = extop->nep->target;
     236             :     /* evaluate the polynomial basis in H */
     237          33 :     PetscCall(NEPDeflationEvaluateBasisMat(extop,-extop->midx,PETSC_FALSE,NULL,extop->Hj,NULL));
     238             :   }
     239          98 :   PetscFunctionReturn(PETSC_SUCCESS);
     240             : }
     241             : 
     242         769 : static PetscErrorCode NEPDeflationEvaluateHatFunction(NEP_EXT_OP extop, PetscInt idx,PetscScalar lambda,PetscScalar *y,PetscScalar *hfj,PetscScalar *hfjp,PetscInt ld)
     243             : {
     244         769 :   PetscInt          i,j,k,off,ini,fin,sz,ldh,n=extop->n;
     245         769 :   Mat               A,B;
     246         769 :   PetscScalar       *array;
     247         769 :   const PetscScalar *barray;
     248             : 
     249         769 :   PetscFunctionBegin;
     250         769 :   if (idx<0) {ini = 0; fin = extop->nep->nt;}
     251         480 :   else {ini = idx; fin = idx+1;}
     252         769 :   if (y) sz = hfjp?n+2:n+1;
     253         769 :   else sz = hfjp?3*n:2*n;
     254         769 :   ldh = extop->szd+1;
     255         769 :   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&A));
     256         769 :   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&B));
     257         769 :   PetscCall(MatDenseGetArray(A,&array));
     258        1903 :   for (j=0;j<n;j++)
     259        3004 :     for (i=0;i<n;i++) array[j*sz+i] = extop->H[j*ldh+i];
     260         769 :   PetscCall(MatDenseRestoreArrayWrite(A,&array));
     261         769 :   if (y) {
     262           0 :     PetscCall(MatDenseGetArray(A,&array));
     263           0 :     array[extop->n*(sz+1)] = lambda;
     264           0 :     if (hfjp) { array[(n+1)*sz+n] = 1.0; array[(n+1)*sz+n+1] = lambda;}
     265           0 :     for (i=0;i<n;i++) array[n*sz+i] = y[i];
     266           0 :     PetscCall(MatDenseRestoreArrayWrite(A,&array));
     267           0 :     for (j=ini;j<fin;j++) {
     268           0 :       PetscCall(FNEvaluateFunctionMat(extop->nep->f[j],A,B));
     269           0 :       PetscCall(MatDenseGetArrayRead(B,&barray));
     270           0 :       for (i=0;i<n;i++) hfj[j*ld+i] = barray[n*sz+i];
     271           0 :       if (hfjp) for (i=0;i<n;i++) hfjp[j*ld+i] = barray[(n+1)*sz+i];
     272           0 :       PetscCall(MatDenseRestoreArrayRead(B,&barray));
     273             :     }
     274             :   } else {
     275         769 :     off = idx<0?ld*n:0;
     276         769 :     PetscCall(MatDenseGetArray(A,&array));
     277        1903 :     for (k=0;k<n;k++) {
     278        1134 :       array[(n+k)*sz+k] = 1.0;
     279        1134 :       array[(n+k)*sz+n+k] = lambda;
     280             :     }
     281        1903 :     if (hfjp) for (k=0;k<n;k++) {
     282        1134 :       array[(2*n+k)*sz+n+k] = 1.0;
     283        1134 :       array[(2*n+k)*sz+2*n+k] = lambda;
     284             :     }
     285         769 :     PetscCall(MatDenseRestoreArray(A,&array));
     286        2113 :     for (j=ini;j<fin;j++) {
     287        1344 :       PetscCall(FNEvaluateFunctionMat(extop->nep->f[j],A,B));
     288        1344 :       PetscCall(MatDenseGetArrayRead(B,&barray));
     289        6678 :       for (i=0;i<n;i++) for (k=0;k<n;k++) hfj[j*off+i*ld+k] = barray[n*sz+i*sz+k];
     290        6678 :       if (hfjp) for (k=0;k<n;k++) for (i=0;i<n;i++) hfjp[j*off+i*ld+k] = barray[2*n*sz+i*sz+k];
     291        1344 :       PetscCall(MatDenseRestoreArrayRead(B,&barray));
     292             :     }
     293             :   }
     294         769 :   PetscCall(MatDestroy(&A));
     295         769 :   PetscCall(MatDestroy(&B));
     296         769 :   PetscFunctionReturn(PETSC_SUCCESS);
     297             : }
     298             : 
     299        3691 : static PetscErrorCode MatMult_NEPDeflation(Mat M,Vec x,Vec y)
     300             : {
     301        3691 :   NEP_DEF_MATSHELL  *matctx;
     302        3691 :   NEP_EXT_OP        extop;
     303        3691 :   Vec               x1,y1;
     304        3691 :   PetscScalar       *yy,sone=1.0,zero=0.0;
     305        3691 :   const PetscScalar *xx;
     306        3691 :   PetscInt          nloc,i;
     307        3691 :   PetscMPIInt       np;
     308        3691 :   PetscBLASInt      n_,one=1,szd_;
     309             : 
     310        3691 :   PetscFunctionBegin;
     311        3691 :   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&np));
     312        3691 :   PetscCall(MatShellGetContext(M,&matctx));
     313        3691 :   extop = matctx->extop;
     314        3691 :   if (extop->ref) PetscCall(VecZeroEntries(y));
     315        3691 :   if (extop->szd) {
     316        2046 :     x1 = matctx->w[0]; y1 = matctx->w[1];
     317        2046 :     PetscCall(VecGetArrayRead(x,&xx));
     318        2046 :     PetscCall(VecPlaceArray(x1,xx));
     319        2046 :     PetscCall(VecGetArray(y,&yy));
     320        2046 :     PetscCall(VecPlaceArray(y1,yy));
     321        2046 :     PetscCall(MatMult(matctx->T,x1,y1));
     322        2046 :     if (!extop->ref && extop->n) {
     323        1001 :       PetscCall(VecGetLocalSize(x1,&nloc));
     324             :       /* copy for avoiding warning of constant array xx */
     325        2436 :       for (i=0;i<extop->n;i++) matctx->work[i] = xx[nloc+i]*PetscSqrtReal(np);
     326        1001 :       PetscCall(BVMultVec(matctx->U,1.0,1.0,y1,matctx->work));
     327        1001 :       PetscCall(BVDotVec(extop->X,x1,matctx->work));
     328        1001 :       PetscCall(PetscBLASIntCast(extop->n,&n_));
     329        1001 :       PetscCall(PetscBLASIntCast(extop->szd,&szd_));
     330        1001 :       PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->A,&szd_,matctx->work,&one,&zero,yy+nloc,&one));
     331        1001 :       PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->B,&szd_,xx+nloc,&one,&sone,yy+nloc,&one));
     332        2436 :       for (i=0;i<extop->n;i++) yy[nloc+i] /= PetscSqrtReal(np);
     333             :     }
     334        2046 :     PetscCall(VecResetArray(x1));
     335        2046 :     PetscCall(VecRestoreArrayRead(x,&xx));
     336        2046 :     PetscCall(VecResetArray(y1));
     337        2046 :     PetscCall(VecRestoreArray(y,&yy));
     338        1645 :   } else PetscCall(MatMult(matctx->T,x,y));
     339        3691 :   PetscFunctionReturn(PETSC_SUCCESS);
     340             : }
     341             : 
     342           0 : static PetscErrorCode MatCreateVecs_NEPDeflation(Mat M,Vec *right,Vec *left)
     343             : {
     344           0 :   NEP_DEF_MATSHELL *matctx;
     345             : 
     346           0 :   PetscFunctionBegin;
     347           0 :   PetscCall(MatShellGetContext(M,&matctx));
     348           0 :   if (right) PetscCall(VecDuplicate(matctx->w[0],right));
     349           0 :   if (left) PetscCall(VecDuplicate(matctx->w[0],left));
     350           0 :   PetscFunctionReturn(PETSC_SUCCESS);
     351             : }
     352             : 
     353         154 : static PetscErrorCode MatDestroy_NEPDeflation(Mat M)
     354             : {
     355         154 :   NEP_DEF_MATSHELL *matctx;
     356             : 
     357         154 :   PetscFunctionBegin;
     358         154 :   PetscCall(MatShellGetContext(M,&matctx));
     359         154 :   if (matctx->extop->szd) {
     360          42 :     PetscCall(BVDestroy(&matctx->U));
     361          42 :     PetscCall(PetscFree2(matctx->hfj,matctx->work));
     362          42 :     PetscCall(PetscFree2(matctx->A,matctx->B));
     363          42 :     PetscCall(VecDestroy(&matctx->w[0]));
     364          42 :     PetscCall(VecDestroy(&matctx->w[1]));
     365             :   }
     366         154 :   if (matctx->P != matctx->T) PetscCall(MatDestroy(&matctx->P));
     367         154 :   PetscCall(MatDestroy(&matctx->T));
     368         154 :   PetscCall(PetscFree(matctx));
     369         154 :   PetscFunctionReturn(PETSC_SUCCESS);
     370             : }
     371             : 
     372         897 : static PetscErrorCode NEPDeflationEvaluateBasis(NEP_EXT_OP extop,PetscScalar lambda,PetscInt n,PetscScalar *val,PetscBool jacobian)
     373             : {
     374         897 :   PetscScalar p;
     375         897 :   PetscInt    i;
     376             : 
     377         897 :   PetscFunctionBegin;
     378         897 :   if (!jacobian) {
     379         526 :     val[0] = 1.0;
     380         766 :     for (i=1;i<extop->n;i++) val[i] = val[i-1]*(lambda-extop->bc[i-1]);
     381             :   } else {
     382         371 :     val[0] = 0.0;
     383         371 :     p = 1.0;
     384         536 :     for (i=1;i<extop->n;i++) {
     385         165 :       val[i] = val[i-1]*(lambda-extop->bc[i-1])+p;
     386         165 :       p *= (lambda-extop->bc[i-1]);
     387             :     }
     388             :   }
     389         897 :   PetscFunctionReturn(PETSC_SUCCESS);
     390             : }
     391             : 
     392        2665 : static PetscErrorCode NEPDeflationComputeShellMat(NEP_EXT_OP extop,PetscScalar lambda,PetscBool jacobian,Mat *M)
     393             : {
     394        2665 :   NEP_DEF_MATSHELL *matctx,*matctxC;
     395        2665 :   PetscInt         nloc,mloc,n=extop->n,j,i,szd=extop->szd,ldh=szd+1,k;
     396        2665 :   Mat              F,Mshell,Mcomp;
     397        2665 :   PetscBool        ini=PETSC_FALSE;
     398        2665 :   PetscScalar      *hf,*hfj,*hfjp,sone=1.0,*hH,*hHprev,*pts,*B,*A,*Hj=extop->Hj,*basisv,zero=0.0;
     399        2665 :   PetscBLASInt     n_,info,szd_;
     400             : 
     401        2665 :   PetscFunctionBegin;
     402        2665 :   if (!M) Mshell = jacobian?extop->MJ:extop->MF;
     403         129 :   else Mshell = *M;
     404        2665 :   Mcomp = jacobian?extop->MF:extop->MJ;
     405        2665 :   if (!Mshell) {
     406         154 :     ini = PETSC_TRUE;
     407         154 :     PetscCall(PetscNew(&matctx));
     408         154 :     PetscCall(MatGetLocalSize(extop->nep->function,&mloc,&nloc));
     409         154 :     nloc += szd; mloc += szd;
     410         154 :     PetscCall(MatCreateShell(PetscObjectComm((PetscObject)extop->nep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&Mshell));
     411         154 :     PetscCall(MatShellSetOperation(Mshell,MATOP_MULT,(void(*)(void))MatMult_NEPDeflation));
     412         154 :     PetscCall(MatShellSetOperation(Mshell,MATOP_CREATE_VECS,(void(*)(void))MatCreateVecs_NEPDeflation));
     413         154 :     PetscCall(MatShellSetOperation(Mshell,MATOP_DESTROY,(void(*)(void))MatDestroy_NEPDeflation));
     414         154 :     matctx->nep = extop->nep;
     415         154 :     matctx->extop = extop;
     416         154 :     if (!M) {
     417         113 :       if (jacobian) { matctx->jacob = PETSC_TRUE; matctx->T = extop->nep->jacobian; extop->MJ = Mshell; }
     418          65 :       else { matctx->jacob = PETSC_FALSE; matctx->T = extop->nep->function; extop->MF = Mshell; }
     419         113 :       PetscCall(PetscObjectReference((PetscObject)matctx->T));
     420         113 :       if (!jacobian) {
     421          65 :         if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) {
     422           2 :           matctx->P = extop->nep->function_pre;
     423           2 :           PetscCall(PetscObjectReference((PetscObject)matctx->P));
     424          63 :         } else matctx->P = matctx->T;
     425             :       }
     426             :     } else {
     427          41 :       matctx->jacob = jacobian;
     428          41 :       PetscCall(MatDuplicate(jacobian?extop->nep->jacobian:extop->nep->function,MAT_DO_NOT_COPY_VALUES,&matctx->T));
     429          41 :       *M = Mshell;
     430          41 :       if (!jacobian) {
     431          41 :         if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) PetscCall(MatDuplicate(extop->nep->function_pre,MAT_DO_NOT_COPY_VALUES,&matctx->P));
     432          41 :         else matctx->P = matctx->T;
     433             :       }
     434             :     }
     435         154 :     if (szd) {
     436          42 :       PetscCall(BVCreateVec(extop->nep->V,matctx->w));
     437          42 :       PetscCall(VecDuplicate(matctx->w[0],matctx->w+1));
     438          42 :       PetscCall(BVDuplicateResize(extop->nep->V,szd,&matctx->U));
     439          42 :       PetscCall(PetscMalloc2(extop->simpU?2*(szd)*(szd):2*(szd)*(szd)*extop->nep->nt,&matctx->hfj,szd,&matctx->work));
     440          42 :       PetscCall(PetscMalloc2(szd*szd,&matctx->A,szd*szd,&matctx->B));
     441             :     }
     442        2511 :   } else PetscCall(MatShellGetContext(Mshell,&matctx));
     443        2511 :   if (ini || matctx->theta != lambda || matctx->n != extop->n) {
     444        2201 :     if (ini || matctx->theta != lambda) {
     445        2193 :       if (jacobian) PetscCall(NEPComputeJacobian(extop->nep,lambda,matctx->T));
     446        1286 :       else PetscCall(NEPComputeFunction(extop->nep,lambda,matctx->T,matctx->P));
     447             :     }
     448        2201 :     if (n) {
     449         657 :       matctx->hfjset = PETSC_FALSE;
     450         657 :       if (!extop->simpU) {
     451             :         /* likely hfjp has been already computed */
     452         500 :         if (Mcomp) {
     453         461 :           PetscCall(MatShellGetContext(Mcomp,&matctxC));
     454         461 :           if (matctxC->hfjset && matctxC->theta == lambda && matctxC->n == extop->n) {
     455         211 :             PetscCall(PetscArraycpy(matctx->hfj,matctxC->hfj,2*extop->szd*extop->szd*extop->nep->nt));
     456         211 :             matctx->hfjset = PETSC_TRUE;
     457             :           }
     458             :         }
     459         500 :         hfj = matctx->hfj; hfjp = matctx->hfj+extop->szd*extop->szd*extop->nep->nt;
     460         500 :         if (!matctx->hfjset) {
     461         289 :           PetscCall(NEPDeflationEvaluateHatFunction(extop,-1,lambda,NULL,hfj,hfjp,n));
     462         289 :           matctx->hfjset = PETSC_TRUE;
     463             :         }
     464         500 :         PetscCall(BVSetActiveColumns(matctx->U,0,n));
     465         500 :         hf = jacobian?hfjp:hfj;
     466         500 :         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,hf,&F));
     467         500 :         PetscCall(BVMatMult(extop->X,extop->nep->A[0],matctx->U));
     468         500 :         PetscCall(BVMultInPlace(matctx->U,F,0,n));
     469         500 :         PetscCall(BVSetActiveColumns(extop->W,0,extop->n));
     470        1495 :         for (j=1;j<extop->nep->nt;j++) {
     471         995 :           PetscCall(BVMatMult(extop->X,extop->nep->A[j],extop->W));
     472         995 :           PetscCall(MatDensePlaceArray(F,hf+j*n*n));
     473         995 :           PetscCall(BVMult(matctx->U,1.0,1.0,extop->W,F));
     474         995 :           PetscCall(MatDenseResetArray(F));
     475             :         }
     476         500 :         PetscCall(MatDestroy(&F));
     477             :       } else {
     478         157 :         hfj = matctx->hfj;
     479         157 :         PetscCall(BVSetActiveColumns(matctx->U,0,n));
     480         157 :         PetscCall(BVMatMult(extop->X,matctx->T,matctx->U));
     481         360 :         for (j=0;j<n;j++) {
     482         498 :           for (i=0;i<n;i++) hfj[j*n+i] = -extop->H[j*ldh+i];
     483         203 :           hfj[j*(n+1)] += lambda;
     484             :         }
     485         157 :         PetscCall(PetscBLASIntCast(n,&n_));
     486         157 :         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
     487         157 :         PetscCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,hfj,&n_,&info));
     488         157 :         PetscCall(PetscFPTrapPop());
     489         157 :         SlepcCheckLapackInfo("trtri",info);
     490         157 :         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,hfj,&F));
     491         157 :         PetscCall(BVMultInPlace(matctx->U,F,0,n));
     492         157 :         if (jacobian) {
     493          62 :           PetscCall(NEPDeflationComputeFunction(extop,lambda,NULL));
     494          62 :           PetscCall(MatShellGetContext(extop->MF,&matctxC));
     495          62 :           PetscCall(BVMult(matctx->U,-1.0,1.0,matctxC->U,F));
     496             :         }
     497         157 :         PetscCall(MatDestroy(&F));
     498             :       }
     499         657 :       PetscCall(PetscCalloc3(n,&basisv,szd*szd,&hH,szd*szd,&hHprev));
     500         657 :       PetscCall(NEPDeflationEvaluateBasis(extop,lambda,n,basisv,jacobian));
     501         657 :       A = matctx->A;
     502         657 :       PetscCall(PetscArrayzero(A,szd*szd));
     503        1217 :       if (!jacobian) for (i=0;i<n;i++) A[i*(szd+1)] = 1.0;
     504        1611 :       for (j=0;j<n;j++)
     505        2512 :         for (i=0;i<n;i++)
     506        1558 :           for (k=1;k<extop->midx;k++) A[j*szd+i] += basisv[k]*PetscConj(Hj[k*szd*szd+i*szd+j]);
     507         657 :       PetscCall(PetscBLASIntCast(n,&n_));
     508         657 :       PetscCall(PetscBLASIntCast(szd,&szd_));
     509         657 :       B = matctx->B;
     510         657 :       PetscCall(PetscArrayzero(B,szd*szd));
     511         657 :       for (i=1;i<extop->midx;i++) {
     512           0 :         PetscCall(NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev));
     513           0 :         PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
     514           0 :         PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,B,&szd_));
     515           0 :         pts = hHprev; hHprev = hH; hH = pts;
     516             :       }
     517         657 :       PetscCall(PetscFree3(basisv,hH,hHprev));
     518             :     }
     519        2201 :     matctx->theta = lambda;
     520        2201 :     matctx->n = extop->n;
     521             :   }
     522        2665 :   PetscFunctionReturn(PETSC_SUCCESS);
     523             : }
     524             : 
     525        1549 : PetscErrorCode NEPDeflationComputeFunction(NEP_EXT_OP extop,PetscScalar lambda,Mat *F)
     526             : {
     527        1549 :   PetscFunctionBegin;
     528        1549 :   PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,NULL));
     529        1549 :   if (F) *F = extop->MF;
     530        1549 :   PetscFunctionReturn(PETSC_SUCCESS);
     531             : }
     532             : 
     533         907 : PetscErrorCode NEPDeflationComputeJacobian(NEP_EXT_OP extop,PetscScalar lambda,Mat *J)
     534             : {
     535         907 :   PetscFunctionBegin;
     536         907 :   PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_TRUE,NULL));
     537         907 :   if (J) *J = extop->MJ;
     538         907 :   PetscFunctionReturn(PETSC_SUCCESS);
     539             : }
     540             : 
     541         209 : PetscErrorCode NEPDeflationSolveSetUp(NEP_EXT_OP extop,PetscScalar lambda)
     542             : {
     543         209 :   NEP_DEF_MATSHELL  *matctx;
     544         209 :   NEP_DEF_FUN_SOLVE solve;
     545         209 :   PetscInt          i,j,n=extop->n;
     546         209 :   Vec               u,tu;
     547         209 :   Mat               F;
     548         209 :   PetscScalar       snone=-1.0,sone=1.0;
     549         209 :   PetscBLASInt      n_,szd_,ldh_,*p,info;
     550         209 :   Mat               Mshell;
     551             : 
     552         209 :   PetscFunctionBegin;
     553         209 :   solve = extop->solve;
     554         209 :   if (lambda!=solve->theta || n!=solve->n) {
     555         209 :     PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,solve->sincf?NULL:&solve->T));
     556         209 :     Mshell = (solve->sincf)?extop->MF:solve->T;
     557         209 :     PetscCall(MatShellGetContext(Mshell,&matctx));
     558         209 :     PetscCall(NEP_KSPSetOperators(solve->ksp,matctx->T,matctx->P));
     559         209 :     if (!extop->ref && n) {
     560          64 :       PetscCall(PetscBLASIntCast(n,&n_));
     561          64 :       PetscCall(PetscBLASIntCast(extop->szd,&szd_));
     562          64 :       PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
     563          64 :       if (!extop->simpU) {
     564          50 :         PetscCall(BVSetActiveColumns(solve->T_1U,0,n));
     565         128 :         for (i=0;i<n;i++) {
     566          78 :           PetscCall(BVGetColumn(matctx->U,i,&u));
     567          78 :           PetscCall(BVGetColumn(solve->T_1U,i,&tu));
     568          78 :           PetscCall(KSPSolve(solve->ksp,u,tu));
     569          78 :           PetscCall(BVRestoreColumn(solve->T_1U,i,&tu));
     570          78 :           PetscCall(BVRestoreColumn(matctx->U,i,&u));
     571             :         }
     572          50 :         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,solve->work,&F));
     573          50 :         PetscCall(BVDot(solve->T_1U,extop->X,F));
     574          50 :         PetscCall(MatDestroy(&F));
     575             :       } else {
     576          34 :         for (j=0;j<n;j++)
     577          52 :           for (i=0;i<n;i++) solve->work[j*n+i] = extop->XpX[j*extop->szd+i];
     578          34 :         for (i=0;i<n;i++) extop->H[i*ldh_+i] -= lambda;
     579          14 :         PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n_,&n_,&snone,extop->H,&ldh_,solve->work,&n_));
     580          34 :         for (i=0;i<n;i++) extop->H[i*ldh_+i] += lambda;
     581             :       }
     582          64 :       PetscCall(PetscArraycpy(solve->M,matctx->B,extop->szd*extop->szd));
     583          64 :       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,matctx->A,&szd_,solve->work,&n_,&sone,solve->M,&szd_));
     584          64 :       PetscCall(PetscMalloc1(n,&p));
     585          64 :       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
     586          64 :       PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,solve->M,&szd_,p,&info));
     587          64 :       SlepcCheckLapackInfo("getrf",info);
     588          64 :       PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,solve->M,&szd_,p,solve->work,&n_,&info));
     589          64 :       SlepcCheckLapackInfo("getri",info);
     590          64 :       PetscCall(PetscFPTrapPop());
     591          64 :       PetscCall(PetscFree(p));
     592             :     }
     593         209 :     solve->theta = lambda;
     594         209 :     solve->n = n;
     595             :   }
     596         209 :   PetscFunctionReturn(PETSC_SUCCESS);
     597             : }
     598             : 
     599        3033 : PetscErrorCode NEPDeflationFunctionSolve(NEP_EXT_OP extop,Vec b,Vec x)
     600             : {
     601        3033 :   Vec               b1,x1;
     602        3033 :   PetscScalar       *xx,*bb,*x2,*b2,*w,*w2,snone=-1.0,sone=1.0,zero=0.0;
     603        3033 :   NEP_DEF_MATSHELL  *matctx;
     604        3033 :   NEP_DEF_FUN_SOLVE solve=extop->solve;
     605        3033 :   PetscBLASInt      one=1,szd_,n_,ldh_;
     606        3033 :   PetscInt          nloc,i;
     607        3033 :   PetscMPIInt       np,count;
     608             : 
     609        3033 :   PetscFunctionBegin;
     610        3033 :   if (extop->ref) PetscCall(VecZeroEntries(x));
     611        3033 :   if (extop->szd) {
     612        1956 :     x1 = solve->w[0]; b1 = solve->w[1];
     613        1956 :     PetscCall(VecGetArray(x,&xx));
     614        1956 :     PetscCall(VecPlaceArray(x1,xx));
     615        1956 :     PetscCall(VecGetArray(b,&bb));
     616        1956 :     PetscCall(VecPlaceArray(b1,bb));
     617             :   } else {
     618             :     b1 = b; x1 = x;
     619             :   }
     620        3033 :   PetscCall(KSPSolve(extop->solve->ksp,b1,x1));
     621        3033 :   if (!extop->ref && extop->n && extop->szd) {
     622         961 :     PetscCall(PetscBLASIntCast(extop->szd,&szd_));
     623         961 :     PetscCall(PetscBLASIntCast(extop->n,&n_));
     624         961 :     PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
     625         961 :     PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
     626         961 :     PetscCall(PetscMalloc2(extop->n,&b2,extop->n,&x2));
     627         961 :     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)b),&np));
     628        2336 :     for (i=0;i<extop->n;i++) b2[i] = bb[nloc+i]*PetscSqrtReal(np);
     629         961 :     w = solve->work; w2 = solve->work+extop->n;
     630         961 :     PetscCall(MatShellGetContext(solve->sincf?extop->MF:solve->T,&matctx));
     631         961 :     PetscCall(PetscArraycpy(w2,b2,extop->n));
     632         961 :     PetscCall(BVDotVec(extop->X,x1,w));
     633         961 :     PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&snone,matctx->A,&szd_,w,&one,&sone,w2,&one));
     634         961 :     PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,solve->M,&szd_,w2,&one,&zero,x2,&one));
     635         961 :     if (extop->simpU) {
     636         475 :       for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] -= solve->theta;
     637         475 :       for (i=0;i<extop->n;i++) w[i] = x2[i];
     638         208 :       PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&one,&snone,extop->H,&ldh_,w,&n_));
     639         475 :       for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] += solve->theta;
     640         208 :       PetscCall(BVMultVec(extop->X,-1.0,1.0,x1,w));
     641         753 :     } else PetscCall(BVMultVec(solve->T_1U,-1.0,1.0,x1,x2));
     642        2336 :     for (i=0;i<extop->n;i++) xx[i+nloc] = x2[i]/PetscSqrtReal(np);
     643         961 :     PetscCall(PetscMPIIntCast(extop->n,&count));
     644        1922 :     PetscCallMPI(MPI_Bcast(xx+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)b)));
     645             :   }
     646        3033 :   if (extop->szd) {
     647        1956 :     PetscCall(VecResetArray(x1));
     648        1956 :     PetscCall(VecRestoreArray(x,&xx));
     649        1956 :     PetscCall(VecResetArray(b1));
     650        1956 :     PetscCall(VecRestoreArray(b,&bb));
     651        1956 :     if (!extop->ref && extop->n) PetscCall(PetscFree2(b2,x2));
     652             :   }
     653        3033 :   PetscFunctionReturn(PETSC_SUCCESS);
     654             : }
     655             : 
     656          95 : PetscErrorCode NEPDeflationSetRefine(NEP_EXT_OP extop,PetscBool ref)
     657             : {
     658          95 :   PetscFunctionBegin;
     659          95 :   extop->ref = ref;
     660          95 :   PetscFunctionReturn(PETSC_SUCCESS);
     661             : }
     662             : 
     663         130 : PetscErrorCode NEPDeflationReset(NEP_EXT_OP extop)
     664             : {
     665         130 :   PetscInt          j;
     666         130 :   NEP_DEF_FUN_SOLVE solve;
     667             : 
     668         130 :   PetscFunctionBegin;
     669         130 :   if (!extop) PetscFunctionReturn(PETSC_SUCCESS);
     670          65 :   PetscCall(PetscFree(extop->H));
     671          65 :   PetscCall(BVDestroy(&extop->X));
     672          65 :   if (extop->szd) {
     673          18 :     PetscCall(VecDestroy(&extop->w));
     674          18 :     PetscCall(PetscFree3(extop->Hj,extop->XpX,extop->bc));
     675          18 :     PetscCall(BVDestroy(&extop->W));
     676             :   }
     677          65 :   PetscCall(MatDestroy(&extop->MF));
     678          65 :   PetscCall(MatDestroy(&extop->MJ));
     679          65 :   if (extop->solve) {
     680          65 :     solve = extop->solve;
     681          65 :     if (extop->szd) {
     682          18 :       if (!extop->simpU) PetscCall(BVDestroy(&solve->T_1U));
     683          18 :       PetscCall(PetscFree2(solve->M,solve->work));
     684          18 :       PetscCall(VecDestroy(&solve->w[0]));
     685          18 :       PetscCall(VecDestroy(&solve->w[1]));
     686             :     }
     687          65 :     if (!solve->sincf) PetscCall(MatDestroy(&solve->T));
     688          65 :     PetscCall(PetscFree(extop->solve));
     689             :   }
     690          65 :   if (extop->proj) {
     691          17 :     if (extop->szd) {
     692          12 :       for (j=0;j<extop->nep->nt;j++) PetscCall(MatDestroy(&extop->proj->V1pApX[j]));
     693           3 :       PetscCall(MatDestroy(&extop->proj->XpV1));
     694           3 :       PetscCall(PetscFree3(extop->proj->V2,extop->proj->V1pApX,extop->proj->work));
     695           3 :       PetscCall(VecDestroy(&extop->proj->w));
     696           3 :       PetscCall(BVDestroy(&extop->proj->V1));
     697             :     }
     698          17 :     PetscCall(PetscFree(extop->proj));
     699             :   }
     700          65 :   PetscCall(PetscFree(extop));
     701          65 :   PetscFunctionReturn(PETSC_SUCCESS);
     702             : }
     703             : 
     704          65 : PetscErrorCode NEPDeflationInitialize(NEP nep,BV X,KSP ksp,PetscBool sincfun,PetscInt sz,NEP_EXT_OP *extop)
     705             : {
     706          65 :   NEP_EXT_OP        op;
     707          65 :   NEP_DEF_FUN_SOLVE solve;
     708          65 :   PetscInt          szd;
     709          65 :   Vec               x;
     710             : 
     711          65 :   PetscFunctionBegin;
     712          65 :   PetscCall(NEPDeflationReset(*extop));
     713          65 :   PetscCall(PetscNew(&op));
     714          65 :   *extop  = op;
     715          65 :   op->nep = nep;
     716          65 :   op->n   = 0;
     717          65 :   op->szd = szd = sz-1;
     718          65 :   op->max_midx = PetscMin(MAX_MINIDX,szd);
     719          65 :   op->X = X;
     720          65 :   if (!X) PetscCall(BVDuplicateResize(nep->V,sz,&op->X));
     721          65 :   else PetscCall(PetscObjectReference((PetscObject)X));
     722          65 :   PetscCall(PetscCalloc1(sz*sz,&(op)->H));
     723          65 :   if (op->szd) {
     724          18 :     PetscCall(BVGetColumn(op->X,0,&x));
     725          18 :     PetscCall(VecDuplicate(x,&op->w));
     726          18 :     PetscCall(BVRestoreColumn(op->X,0,&x));
     727          18 :     op->simpU = PETSC_FALSE;
     728          18 :     if (nep->fui==NEP_USER_INTERFACE_SPLIT) {
     729             :       /* undocumented option to use the simple expression for U = T*X*inv(lambda-H) */
     730          16 :       PetscCall(PetscOptionsGetBool(NULL,NULL,"-nep_deflation_simpleu",&op->simpU,NULL));
     731             :     } else {
     732           2 :       op->simpU = PETSC_TRUE;
     733             :     }
     734          18 :     PetscCall(PetscCalloc3(szd*szd*op->max_midx,&(op)->Hj,szd*szd,&(op)->XpX,szd,&op->bc));
     735          18 :     PetscCall(BVDuplicateResize(op->X,op->szd,&op->W));
     736             :   }
     737          65 :   if (ksp) {
     738          65 :     PetscCall(PetscNew(&solve));
     739          65 :     op->solve    = solve;
     740          65 :     solve->ksp   = ksp;
     741          65 :     solve->sincf = sincfun;
     742          65 :     solve->n     = -1;
     743          65 :     if (op->szd) {
     744          18 :       if (!op->simpU) PetscCall(BVDuplicateResize(nep->V,szd,&solve->T_1U));
     745          18 :       PetscCall(PetscMalloc2(szd*szd,&solve->M,2*szd*szd,&solve->work));
     746          18 :       PetscCall(BVCreateVec(nep->V,&solve->w[0]));
     747          18 :       PetscCall(VecDuplicate(solve->w[0],&solve->w[1]));
     748             :     }
     749             :   }
     750          65 :   PetscFunctionReturn(PETSC_SUCCESS);
     751             : }
     752             : 
     753         959 : static PetscErrorCode NEPDeflationDSNEPComputeMatrix(DS ds,PetscScalar lambda,PetscBool deriv,DSMatType mat,void *ctx)
     754             : {
     755         959 :   Mat               A,Ei;
     756         959 :   PetscScalar       *T,*w1,*w2,*w=NULL,*ww,*hH,*hHprev,*pts;
     757         959 :   PetscScalar       alpha,alpha2,*AB,sone=1.0,zero=0.0,*basisv,s;
     758         959 :   const PetscScalar *E;
     759         959 :   PetscInt          i,ldds,nwork=0,szd,nv,j,k,n;
     760         959 :   PetscBLASInt      inc=1,nv_,ldds_,dim_,szdk,szd_,n_,ldh_;
     761         959 :   PetscMPIInt       np;
     762         959 :   NEP_DEF_PROJECT   proj=(NEP_DEF_PROJECT)ctx;
     763         959 :   NEP_EXT_OP        extop=proj->extop;
     764         959 :   NEP               nep=extop->nep;
     765             : 
     766         959 :   PetscFunctionBegin;
     767         959 :   PetscCall(DSGetDimensions(ds,&nv,NULL,NULL,NULL));
     768         959 :   PetscCall(DSGetLeadingDimension(ds,&ldds));
     769         959 :   PetscCall(DSGetMat(ds,mat,&A));
     770         959 :   PetscCall(MatZeroEntries(A));
     771             :   /* mat = V1^*T(lambda)V1 */
     772        3836 :   for (i=0;i<nep->nt;i++) {
     773        2877 :     if (deriv) PetscCall(FNEvaluateDerivative(nep->f[i],lambda,&alpha));
     774        1713 :     else PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha));
     775        2877 :     PetscCall(DSGetMat(ds,DSMatExtra[i],&Ei));
     776        2877 :     PetscCall(MatAXPY(A,alpha,Ei,SAME_NONZERO_PATTERN));
     777        2877 :     PetscCall(DSRestoreMat(ds,DSMatExtra[i],&Ei));
     778             :   }
     779         959 :   PetscCall(DSRestoreMat(ds,mat,&A));
     780         959 :   if (!extop->ref && extop->n) {
     781         240 :     PetscCall(DSGetArray(ds,mat,&T));
     782         240 :     n = extop->n;
     783         240 :     szd = extop->szd;
     784         240 :     PetscCall(PetscArrayzero(proj->work,proj->lwork));
     785         240 :     PetscCall(PetscBLASIntCast(nv,&nv_));
     786         240 :     PetscCall(PetscBLASIntCast(n,&n_));
     787         240 :     PetscCall(PetscBLASIntCast(ldds,&ldds_));
     788         240 :     PetscCall(PetscBLASIntCast(szd,&szd_));
     789         240 :     PetscCall(PetscBLASIntCast(proj->dim,&dim_));
     790         240 :     PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
     791         240 :     w1 = proj->work; w2 = proj->work+proj->dim*proj->dim;
     792         240 :     nwork += 2*proj->dim*proj->dim;
     793             : 
     794             :     /* mat = mat + V1^*U(lambda)V2 */
     795         960 :     for (i=0;i<nep->nt;i++) {
     796         720 :       if (extop->simpU) {
     797         240 :         if (deriv) PetscCall(FNEvaluateDerivative(nep->f[i],lambda,&alpha));
     798         144 :         else PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha));
     799         240 :         ww = w1; w = w2;
     800         240 :         PetscCall(PetscArraycpy(ww,proj->V2,szd*nv));
     801         240 :         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np));
     802        2430 :         for (j=0;j<szd*nv;j++) ww[j] *= PetscSqrtReal(np);
     803         585 :         for (j=0;j<n;j++) extop->H[j*ldh_+j] -= lambda;
     804         240 :         alpha = -alpha;
     805         240 :         PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha,extop->H,&ldh_,ww,&szd_));
     806         240 :         if (deriv) {
     807          96 :           PetscCall(PetscBLASIntCast(szd*nv,&szdk));
     808          96 :           PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha2));
     809          96 :           PetscCall(PetscArraycpy(w,proj->V2,szd*nv));
     810         972 :           for (j=0;j<szd*nv;j++) w[j] *= PetscSqrtReal(np);
     811          96 :           alpha2 = -alpha2;
     812          96 :           PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
     813          96 :           alpha2 = 1.0;
     814          96 :           PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
     815          96 :           PetscCallBLAS("BLASaxpy",BLASaxpy_(&szdk,&sone,w,&inc,ww,&inc));
     816             :         }
     817         585 :         for (j=0;j<n;j++) extop->H[j*ldh_+j] += lambda;
     818             :       } else {
     819         480 :         PetscCall(NEPDeflationEvaluateHatFunction(extop,i,lambda,NULL,w1,w2,szd));
     820         762 :         w = deriv?w2:w1; ww = deriv?w1:w2;
     821         480 :         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np));
     822         480 :         s = PetscSqrtReal(np);
     823         480 :         PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&s,w,&szd_,proj->V2,&szd_,&zero,ww,&szd_));
     824             :       }
     825         720 :       PetscCall(MatDenseGetArrayRead(proj->V1pApX[i],&E));
     826         720 :       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&nv_,&nv_,&n_,&sone,E,&dim_,ww,&szd_,&sone,T,&ldds_));
     827         720 :       PetscCall(MatDenseRestoreArrayRead(proj->V1pApX[i],&E));
     828             :     }
     829             : 
     830             :     /* mat = mat + V2^*A(lambda)V1 */
     831         240 :     basisv = proj->work+nwork; nwork += szd;
     832         240 :     hH     = proj->work+nwork; nwork += szd*szd;
     833         240 :     hHprev = proj->work+nwork; nwork += szd*szd;
     834         240 :     AB     = proj->work+nwork;
     835         240 :     PetscCall(NEPDeflationEvaluateBasis(extop,lambda,n,basisv,deriv));
     836         446 :     if (!deriv) for (i=0;i<n;i++) AB[i*(szd+1)] = 1.0;
     837         588 :     for (j=0;j<n;j++)
     838         912 :       for (i=0;i<n;i++)
     839         564 :         for (k=1;k<extop->midx;k++) AB[j*szd+i] += basisv[k]*PetscConj(extop->Hj[k*szd*szd+i*szd+j]);
     840         240 :     PetscCall(MatDenseGetArrayRead(proj->XpV1,&E));
     841         240 :     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,E,&szd_,&zero,w,&szd_));
     842         240 :     PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
     843         240 :     PetscCall(MatDenseRestoreArrayRead(proj->XpV1,&E));
     844             : 
     845             :     /* mat = mat + V2^*B(lambda)V2 */
     846         240 :     PetscCall(PetscArrayzero(AB,szd*szd));
     847         240 :     for (i=1;i<extop->midx;i++) {
     848           0 :       PetscCall(NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev));
     849           0 :       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
     850           0 :       PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,AB,&szd_));
     851           0 :       pts = hHprev; hHprev = hH; hH = pts;
     852             :     }
     853         240 :     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,proj->V2,&szd_,&zero,w,&szd_));
     854         240 :     PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
     855         240 :     PetscCall(DSRestoreArray(ds,mat,&T));
     856             :   }
     857         959 :   PetscFunctionReturn(PETSC_SUCCESS);
     858             : }
     859             : 
     860         183 : PetscErrorCode NEPDeflationProjectOperator(NEP_EXT_OP extop,BV Vext,DS ds,PetscInt j0,PetscInt j1)
     861             : {
     862         183 :   PetscInt        k,j,n=extop->n,dim;
     863         183 :   Vec             v,ve;
     864         183 :   BV              V1;
     865         183 :   Mat             G;
     866         183 :   NEP             nep=extop->nep;
     867         183 :   NEP_DEF_PROJECT proj;
     868             : 
     869         183 :   PetscFunctionBegin;
     870         183 :   NEPCheckSplit(extop->nep,1);
     871         183 :   proj = extop->proj;
     872         183 :   if (!proj) {
     873             :     /* Initialize the projection data structure */
     874          17 :     PetscCall(PetscNew(&proj));
     875          17 :     extop->proj = proj;
     876          17 :     proj->extop = extop;
     877          17 :     PetscCall(BVGetSizes(Vext,NULL,NULL,&dim));
     878          17 :     proj->dim = dim;
     879          17 :     if (extop->szd) {
     880           3 :       proj->lwork = 3*dim*dim+2*extop->szd*extop->szd+extop->szd;
     881           3 :       PetscCall(PetscMalloc3(dim*extop->szd,&proj->V2,nep->nt,&proj->V1pApX,proj->lwork,&proj->work));
     882          12 :       for (j=0;j<nep->nt;j++) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,proj->dim,extop->szd,NULL,&proj->V1pApX[j]));
     883           3 :       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,extop->szd,proj->dim,NULL,&proj->XpV1));
     884           3 :       PetscCall(BVCreateVec(extop->X,&proj->w));
     885           3 :       PetscCall(BVDuplicateResize(extop->X,proj->dim,&proj->V1));
     886             :     }
     887          17 :     PetscCall(DSNEPSetComputeMatrixFunction(ds,NEPDeflationDSNEPComputeMatrix,(void*)proj));
     888             :   }
     889             : 
     890             :   /* Split Vext in V1 and V2 */
     891         183 :   if (extop->szd) {
     892         136 :     for (j=j0;j<j1;j++) {
     893          68 :       PetscCall(BVGetColumn(Vext,j,&ve));
     894          68 :       PetscCall(BVGetColumn(proj->V1,j,&v));
     895          68 :       PetscCall(NEPDeflationCopyToExtendedVec(extop,v,proj->V2+j*extop->szd,ve,PETSC_TRUE));
     896          68 :       PetscCall(BVRestoreColumn(proj->V1,j,&v));
     897          68 :       PetscCall(BVRestoreColumn(Vext,j,&ve));
     898             :     }
     899          68 :     V1 = proj->V1;
     900             :   } else V1 = Vext;
     901             : 
     902             :   /* Compute matrices V1^* A_i V1 */
     903         183 :   PetscCall(BVSetActiveColumns(V1,j0,j1));
     904         732 :   for (k=0;k<nep->nt;k++) {
     905         549 :     PetscCall(DSGetMat(ds,DSMatExtra[k],&G));
     906         549 :     PetscCall(BVMatProject(V1,nep->A[k],V1,G));
     907         549 :     PetscCall(DSRestoreMat(ds,DSMatExtra[k],&G));
     908             :   }
     909             : 
     910         183 :   if (extop->n) {
     911          44 :     if (extop->szd) {
     912             :       /* Compute matrices V1^* A_i X  and V1^* X */
     913          44 :       PetscCall(BVSetActiveColumns(extop->W,0,n));
     914         176 :       for (k=0;k<nep->nt;k++) {
     915         132 :         PetscCall(BVMatMult(extop->X,nep->A[k],extop->W));
     916         132 :         PetscCall(BVDot(extop->W,V1,proj->V1pApX[k]));
     917             :       }
     918          44 :       PetscCall(BVDot(V1,extop->X,proj->XpV1));
     919             :     }
     920             :   }
     921         183 :   PetscFunctionReturn(PETSC_SUCCESS);
     922             : }

Generated by: LCOV version 1.14