LCOV - code coverage report
Current view: top level - nep/impls - nepdefl.c (source / functions) Hit Total Coverage
Test: SLEPc Lines: 686 721 95.1 %
Date: 2024-04-25 00:29:53 Functions: 23 24 95.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       3             :    SLEPc - Scalable Library for Eigenvalue Problem Computations
       4             :    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
       5             : 
       6             :    This file is part of SLEPc.
       7             :    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
       8             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       9             : */
      10             : 
      11             : #include <slepc/private/nepimpl.h>         /*I "slepcnep.h" I*/
      12             : #include <slepcblaslapack.h>
      13             : #include "nepdefl.h"
      14             : 
      15          66 : PetscErrorCode NEPDeflationGetInvariantPair(NEP_EXT_OP extop,BV *X,Mat *H)
      16             : {
      17          66 :   PetscFunctionBegin;
      18          66 :   if (X) *X = extop->X;
      19          66 :   if (H) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,extop->szd+1,extop->szd+1,extop->H,H));
      20          66 :   PetscFunctionReturn(PETSC_SUCCESS);
      21             : }
      22             : 
      23         125 : static PetscErrorCode NEPDeflationExtendInvariantPair(NEP_EXT_OP extop,Vec u,PetscScalar lambda,PetscInt k)
      24             : {
      25         125 :   Vec            uu;
      26         125 :   PetscInt       ld,i;
      27         125 :   PetscMPIInt    np;
      28         125 :   PetscReal      norm;
      29             : 
      30         125 :   PetscFunctionBegin;
      31         125 :   PetscCall(BVGetColumn(extop->X,k,&uu));
      32         125 :   ld = extop->szd+1;
      33         125 :   PetscCall(NEPDeflationCopyToExtendedVec(extop,uu,extop->H+k*ld,u,PETSC_TRUE));
      34         125 :   PetscCall(BVRestoreColumn(extop->X,k,&uu));
      35         125 :   PetscCall(BVNormColumn(extop->X,k,NORM_2,&norm));
      36         125 :   PetscCall(BVScaleColumn(extop->X,k,1.0/norm));
      37         125 :   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)u),&np));
      38         216 :   for (i=0;i<k;i++) extop->H[k*ld+i] *= PetscSqrtReal(np)/norm;
      39         125 :   extop->H[k*(ld+1)] = lambda;
      40         125 :   PetscFunctionReturn(PETSC_SUCCESS);
      41             : }
      42             : 
      43          23 : PetscErrorCode NEPDeflationExtractEigenpair(NEP_EXT_OP extop,PetscInt k,Vec u,PetscScalar lambda,DS ds)
      44             : {
      45          23 :   Mat            A,H;
      46          23 :   PetscInt       ldh=extop->szd+1,ldds,k1=k+1;
      47          23 :   PetscScalar    *eigr,*eigi,*t,*Z;
      48          23 :   Vec            x;
      49             : 
      50          23 :   PetscFunctionBegin;
      51          23 :   PetscCall(NEPDeflationExtendInvariantPair(extop,u,lambda,k));
      52          23 :   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k1,k1,extop->H,&H));
      53          23 :   PetscCall(MatDenseSetLDA(H,ldh));
      54          23 :   PetscCall(PetscCalloc3(k1,&eigr,k1,&eigi,extop->szd,&t));
      55          23 :   PetscCall(DSReset(ds));
      56          23 :   PetscCall(DSSetType(ds,DSNHEP));
      57          23 :   PetscCall(DSAllocate(ds,ldh));
      58          23 :   PetscCall(DSGetLeadingDimension(ds,&ldds));
      59          23 :   PetscCall(DSSetDimensions(ds,k1,0,k1));
      60          23 :   PetscCall(DSGetMat(ds,DS_MAT_A,&A));
      61          23 :   PetscCall(MatCopy(H,A,SAME_NONZERO_PATTERN));
      62          23 :   PetscCall(DSRestoreMat(ds,DS_MAT_A,&A));
      63          23 :   PetscCall(MatDestroy(&H));
      64          23 :   PetscCall(DSSolve(ds,eigr,eigi));
      65          23 :   PetscCall(DSVectors(ds,DS_MAT_X,&k,NULL));
      66          23 :   PetscCall(DSGetArray(ds,DS_MAT_X,&Z));
      67          23 :   PetscCall(BVMultColumn(extop->X,1.0,Z[k*ldds+k],k,Z+k*ldds));
      68          23 :   PetscCall(DSRestoreArray(ds,DS_MAT_X,&Z));
      69          23 :   PetscCall(BVGetColumn(extop->X,k,&x));
      70          23 :   PetscCall(NEPDeflationCopyToExtendedVec(extop,x,t,u,PETSC_FALSE));
      71          23 :   PetscCall(BVRestoreColumn(extop->X,k,&x));
      72          23 :   PetscCall(PetscFree3(eigr,eigi,t));
      73          23 :   PetscFunctionReturn(PETSC_SUCCESS);
      74             : }
      75             : 
      76         275 : PetscErrorCode NEPDeflationCopyToExtendedVec(NEP_EXT_OP extop,Vec v,PetscScalar *a,Vec vex,PetscBool back)
      77             : {
      78         275 :   PetscMPIInt    np,rk,count;
      79         275 :   PetscScalar    *array1,*array2;
      80         275 :   PetscInt       nloc;
      81             : 
      82         275 :   PetscFunctionBegin;
      83         275 :   if (extop->szd) {
      84         181 :     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)vex),&rk));
      85         181 :     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)vex),&np));
      86         181 :     PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
      87         181 :     if (v) {
      88         181 :       PetscCall(VecGetArray(v,&array1));
      89         181 :       PetscCall(VecGetArray(vex,&array2));
      90         181 :       if (back) PetscCall(PetscArraycpy(array1,array2,nloc));
      91          42 :       else PetscCall(PetscArraycpy(array2,array1,nloc));
      92         181 :       PetscCall(VecRestoreArray(v,&array1));
      93         181 :       PetscCall(VecRestoreArray(vex,&array2));
      94             :     }
      95         181 :     if (a) {
      96         162 :       PetscCall(VecGetArray(vex,&array2));
      97         162 :       if (back) {
      98         139 :         PetscCall(PetscArraycpy(a,array2+nloc,extop->szd));
      99         139 :         PetscCall(PetscMPIIntCast(extop->szd,&count));
     100         278 :         PetscCallMPI(MPI_Bcast(a,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex)));
     101             :       } else {
     102          23 :         PetscCall(PetscArraycpy(array2+nloc,a,extop->szd));
     103          23 :         PetscCall(PetscMPIIntCast(extop->szd,&count));
     104          46 :         PetscCallMPI(MPI_Bcast(array2+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex)));
     105             :       }
     106         162 :       PetscCall(VecRestoreArray(vex,&array2));
     107             :     }
     108             :   } else {
     109          94 :     if (back) PetscCall(VecCopy(vex,v));
     110          47 :     else PetscCall(VecCopy(v,vex));
     111             :   }
     112         275 :   PetscFunctionReturn(PETSC_SUCCESS);
     113             : }
     114             : 
     115          74 : PetscErrorCode NEPDeflationCreateVec(NEP_EXT_OP extop,Vec *v)
     116             : {
     117          74 :   PetscInt       nloc;
     118          74 :   Vec            u;
     119          74 :   VecType        type;
     120             : 
     121          74 :   PetscFunctionBegin;
     122          74 :   if (extop->szd) {
     123          26 :     PetscCall(BVGetColumn(extop->nep->V,0,&u));
     124          26 :     PetscCall(VecGetType(u,&type));
     125          26 :     PetscCall(BVRestoreColumn(extop->nep->V,0,&u));
     126          26 :     PetscCall(VecCreate(PetscObjectComm((PetscObject)extop->nep),v));
     127          26 :     PetscCall(VecSetType(*v,type));
     128          26 :     PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
     129          26 :     nloc += extop->szd;
     130          26 :     PetscCall(VecSetSizes(*v,nloc,PETSC_DECIDE));
     131          48 :   } else PetscCall(BVCreateVec(extop->nep->V,v));
     132          74 :   PetscFunctionReturn(PETSC_SUCCESS);
     133             : }
     134             : 
     135          17 : PetscErrorCode NEPDeflationCreateBV(NEP_EXT_OP extop,PetscInt sz,BV *V)
     136             : {
     137          17 :   PetscInt           nloc;
     138          17 :   BVType             type;
     139          17 :   BVOrthogType       otype;
     140          17 :   BVOrthogRefineType oref;
     141          17 :   PetscReal          oeta;
     142          17 :   BVOrthogBlockType  oblock;
     143          17 :   NEP                nep=extop->nep;
     144          17 :   VecType            vtype;
     145             : 
     146          17 :   PetscFunctionBegin;
     147          17 :   if (extop->szd) {
     148           3 :     PetscCall(BVGetSizes(nep->V,&nloc,NULL,NULL));
     149           3 :     PetscCall(BVCreate(PetscObjectComm((PetscObject)nep),V));
     150           3 :     PetscCall(BVSetSizes(*V,nloc+extop->szd,PETSC_DECIDE,sz));
     151           3 :     PetscCall(BVGetType(nep->V,&type));
     152           3 :     PetscCall(BVSetType(*V,type));
     153           3 :     PetscCall(BVGetVecType(nep->V,&vtype));
     154           3 :     PetscCall(BVSetVecType(*V,vtype));
     155           3 :     PetscCall(BVGetOrthogonalization(nep->V,&otype,&oref,&oeta,&oblock));
     156           3 :     PetscCall(BVSetOrthogonalization(*V,otype,oref,oeta,oblock));
     157           3 :     PetscCall(PetscObjectStateIncrease((PetscObject)*V));
     158          14 :   } else PetscCall(BVDuplicateResize(nep->V,sz,V));
     159          17 :   PetscFunctionReturn(PETSC_SUCCESS);
     160             : }
     161             : 
     162          17 : PetscErrorCode NEPDeflationSetRandomVec(NEP_EXT_OP extop,Vec v)
     163             : {
     164          17 :   PetscInt       n,next,i;
     165          17 :   PetscRandom    rand;
     166          17 :   PetscScalar    *array;
     167          17 :   PetscMPIInt    nn,np;
     168             : 
     169          17 :   PetscFunctionBegin;
     170          17 :   PetscCall(BVGetRandomContext(extop->nep->V,&rand));
     171          17 :   PetscCall(VecSetRandom(v,rand));
     172          17 :   if (extop->szd) {
     173          17 :     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)v),&np));
     174          17 :     PetscCall(BVGetSizes(extop->nep->V,&n,NULL,NULL));
     175          17 :     PetscCall(VecGetLocalSize(v,&next));
     176          17 :     PetscCall(VecGetArray(v,&array));
     177          25 :     for (i=n+extop->n;i<next;i++) array[i] = 0.0;
     178          42 :     for (i=n;i<n+extop->n;i++) array[i] /= PetscSqrtReal(np);
     179          17 :     PetscCall(PetscMPIIntCast(extop->n,&nn));
     180          34 :     PetscCallMPI(MPI_Bcast(array+n,nn,MPIU_SCALAR,0,PetscObjectComm((PetscObject)v)));
     181          17 :     PetscCall(VecRestoreArray(v,&array));
     182             :   }
     183          17 :   PetscFunctionReturn(PETSC_SUCCESS);
     184             : }
     185             : 
     186          36 : static PetscErrorCode NEPDeflationEvaluateBasisMat(NEP_EXT_OP extop,PetscInt idx,PetscBool hat,PetscScalar *bval,PetscScalar *Hj,PetscScalar *Hjprev)
     187             : {
     188          36 :   PetscInt       i,k,n=extop->n,ldhj=extop->szd,ldh=extop->szd+1;
     189          36 :   PetscScalar    sone=1.0,zero=0.0;
     190          36 :   PetscBLASInt   ldh_,ldhj_,n_;
     191             : 
     192          36 :   PetscFunctionBegin;
     193          36 :   i = (idx<0)?extop->szd*extop->szd*(-idx):extop->szd*extop->szd;
     194          36 :   PetscCall(PetscArrayzero(Hj,i));
     195          36 :   PetscCall(PetscBLASIntCast(ldhj+1,&ldh_));
     196          36 :   PetscCall(PetscBLASIntCast(ldhj,&ldhj_));
     197          36 :   PetscCall(PetscBLASIntCast(n,&n_));
     198          36 :   if (idx<1) {
     199          91 :     if (!hat) for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 1.0;
     200           0 :     else for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 0.0;
     201             :   } else {
     202           0 :       for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[idx-1];
     203           0 :       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hjprev,&ldhj_,&zero,Hj,&ldhj_));
     204           0 :       for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[idx-1];
     205           0 :       if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[idx-1];
     206             :   }
     207          36 :   if (idx<0) {
     208          36 :     idx = -idx;
     209          36 :     for (k=1;k<idx;k++) {
     210           0 :       for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[k-1];
     211           0 :       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hj+(k-1)*ldhj*ldhj,&ldhj_,&zero,Hj+k*ldhj*ldhj,&ldhj_));
     212           0 :       for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[k-1];
     213           0 :       if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[k-1];
     214             :     }
     215             :   }
     216          36 :   PetscFunctionReturn(PETSC_SUCCESS);
     217             : }
     218             : 
     219         102 : PetscErrorCode NEPDeflationLocking(NEP_EXT_OP extop,Vec u,PetscScalar lambda)
     220             : {
     221         102 :   PetscInt       i;
     222             : 
     223         102 :   PetscFunctionBegin;
     224         102 :   PetscCall(NEPDeflationExtendInvariantPair(extop,u,lambda,extop->n));
     225         102 :   extop->n++;
     226         102 :   PetscCall(BVSetActiveColumns(extop->X,0,extop->n));
     227         102 :   if (extop->n <= extop->szd) {
     228             :     /* update XpX */
     229          36 :     PetscCall(BVDotColumn(extop->X,extop->n-1,extop->XpX+(extop->n-1)*extop->szd));
     230          36 :     extop->XpX[(extop->n-1)*(1+extop->szd)] = 1.0;
     231          55 :     for (i=0;i<extop->n-1;i++) extop->XpX[i*extop->szd+extop->n-1] = PetscConj(extop->XpX[(extop->n-1)*extop->szd+i]);
     232             :     /* determine minimality index */
     233          36 :     extop->midx = PetscMin(extop->max_midx,extop->n);
     234             :     /* polynominal basis coefficients */
     235          72 :     for (i=0;i<extop->midx;i++) extop->bc[i] = extop->nep->target;
     236             :     /* evaluate the polynomial basis in H */
     237          36 :     PetscCall(NEPDeflationEvaluateBasisMat(extop,-extop->midx,PETSC_FALSE,NULL,extop->Hj,NULL));
     238             :   }
     239         102 :   PetscFunctionReturn(PETSC_SUCCESS);
     240             : }
     241             : 
     242         791 : static PetscErrorCode NEPDeflationEvaluateHatFunction(NEP_EXT_OP extop, PetscInt idx,PetscScalar lambda,PetscScalar *y,PetscScalar *hfj,PetscScalar *hfjp,PetscInt ld)
     243             : {
     244         791 :   PetscInt          i,j,k,off,ini,fin,sz,ldh,n=extop->n;
     245         791 :   Mat               A,B;
     246         791 :   PetscScalar       *array;
     247         791 :   const PetscScalar *barray;
     248             : 
     249         791 :   PetscFunctionBegin;
     250         791 :   if (idx<0) {ini = 0; fin = extop->nep->nt;}
     251         471 :   else {ini = idx; fin = idx+1;}
     252         791 :   if (y) sz = hfjp?n+2:n+1;
     253         791 :   else sz = hfjp?3*n:2*n;
     254         791 :   ldh = extop->szd+1;
     255         791 :   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&A));
     256         791 :   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&B));
     257         791 :   PetscCall(MatDenseGetArray(A,&array));
     258        1969 :   for (j=0;j<n;j++)
     259        3142 :     for (i=0;i<n;i++) array[j*sz+i] = extop->H[j*ldh+i];
     260         791 :   PetscCall(MatDenseRestoreArrayWrite(A,&array));
     261         791 :   if (y) {
     262           0 :     PetscCall(MatDenseGetArray(A,&array));
     263           0 :     array[extop->n*(sz+1)] = lambda;
     264           0 :     if (hfjp) { array[(n+1)*sz+n] = 1.0; array[(n+1)*sz+n+1] = lambda;}
     265           0 :     for (i=0;i<n;i++) array[n*sz+i] = y[i];
     266           0 :     PetscCall(MatDenseRestoreArrayWrite(A,&array));
     267           0 :     for (j=ini;j<fin;j++) {
     268           0 :       PetscCall(FNEvaluateFunctionMat(extop->nep->f[j],A,B));
     269           0 :       PetscCall(MatDenseGetArrayRead(B,&barray));
     270           0 :       for (i=0;i<n;i++) hfj[j*ld+i] = barray[n*sz+i];
     271           0 :       if (hfjp) for (i=0;i<n;i++) hfjp[j*ld+i] = barray[(n+1)*sz+i];
     272           0 :       PetscCall(MatDenseRestoreArrayRead(B,&barray));
     273             :     }
     274             :   } else {
     275         791 :     off = idx<0?ld*n:0;
     276         791 :     PetscCall(MatDenseGetArray(A,&array));
     277        1969 :     for (k=0;k<n;k++) {
     278        1178 :       array[(n+k)*sz+k] = 1.0;
     279        1178 :       array[(n+k)*sz+n+k] = lambda;
     280             :     }
     281        1969 :     if (hfjp) for (k=0;k<n;k++) {
     282        1178 :       array[(2*n+k)*sz+n+k] = 1.0;
     283        1178 :       array[(2*n+k)*sz+2*n+k] = lambda;
     284             :     }
     285         791 :     PetscCall(MatDenseRestoreArray(A,&array));
     286        2219 :     for (j=ini;j<fin;j++) {
     287        1428 :       PetscCall(FNEvaluateFunctionMat(extop->nep->f[j],A,B));
     288        1428 :       PetscCall(MatDenseGetArrayRead(B,&barray));
     289        6996 :       for (i=0;i<n;i++) for (k=0;k<n;k++) hfj[j*off+i*ld+k] = barray[n*sz+i*sz+k];
     290        6996 :       if (hfjp) for (k=0;k<n;k++) for (i=0;i<n;i++) hfjp[j*off+i*ld+k] = barray[2*n*sz+i*sz+k];
     291        1428 :       PetscCall(MatDenseRestoreArrayRead(B,&barray));
     292             :     }
     293             :   }
     294         791 :   PetscCall(MatDestroy(&A));
     295         791 :   PetscCall(MatDestroy(&B));
     296         791 :   PetscFunctionReturn(PETSC_SUCCESS);
     297             : }
     298             : 
     299        4042 : static PetscErrorCode MatMult_NEPDeflation(Mat M,Vec x,Vec y)
     300             : {
     301        4042 :   NEP_DEF_MATSHELL  *matctx;
     302        4042 :   NEP_EXT_OP        extop;
     303        4042 :   Vec               x1,y1;
     304        4042 :   PetscScalar       *yy,sone=1.0,zero=0.0;
     305        4042 :   const PetscScalar *xx;
     306        4042 :   PetscInt          nloc,i;
     307        4042 :   PetscMPIInt       np;
     308        4042 :   PetscBLASInt      n_,one=1,szd_;
     309             : 
     310        4042 :   PetscFunctionBegin;
     311        4042 :   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M),&np));
     312        4042 :   PetscCall(MatShellGetContext(M,&matctx));
     313        4042 :   extop = matctx->extop;
     314        4042 :   if (extop->ref) PetscCall(VecZeroEntries(y));
     315        4042 :   if (extop->szd) {
     316        2271 :     x1 = matctx->w[0]; y1 = matctx->w[1];
     317        2271 :     PetscCall(VecGetArrayRead(x,&xx));
     318        2271 :     PetscCall(VecPlaceArray(x1,xx));
     319        2271 :     PetscCall(VecGetArray(y,&yy));
     320        2271 :     PetscCall(VecPlaceArray(y1,yy));
     321        2271 :     PetscCall(MatMult(matctx->T,x1,y1));
     322        2271 :     if (!extop->ref && extop->n) {
     323        1187 :       PetscCall(VecGetLocalSize(x1,&nloc));
     324             :       /* copy for avoiding warning of constant array xx */
     325        2849 :       for (i=0;i<extop->n;i++) matctx->work[i] = xx[nloc+i]*PetscSqrtReal(np);
     326        1187 :       PetscCall(BVMultVec(matctx->U,1.0,1.0,y1,matctx->work));
     327        1187 :       PetscCall(BVDotVec(extop->X,x1,matctx->work));
     328        1187 :       PetscCall(PetscBLASIntCast(extop->n,&n_));
     329        1187 :       PetscCall(PetscBLASIntCast(extop->szd,&szd_));
     330        1187 :       PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->A,&szd_,matctx->work,&one,&zero,yy+nloc,&one));
     331        1187 :       PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->B,&szd_,xx+nloc,&one,&sone,yy+nloc,&one));
     332        2849 :       for (i=0;i<extop->n;i++) yy[nloc+i] /= PetscSqrtReal(np);
     333             :     }
     334        2271 :     PetscCall(VecResetArray(x1));
     335        2271 :     PetscCall(VecRestoreArrayRead(x,&xx));
     336        2271 :     PetscCall(VecResetArray(y1));
     337        2271 :     PetscCall(VecRestoreArray(y,&yy));
     338        1771 :   } else PetscCall(MatMult(matctx->T,x,y));
     339        4042 :   PetscFunctionReturn(PETSC_SUCCESS);
     340             : }
     341             : 
     342           0 : static PetscErrorCode MatCreateVecs_NEPDeflation(Mat M,Vec *right,Vec *left)
     343             : {
     344           0 :   NEP_DEF_MATSHELL *matctx;
     345             : 
     346           0 :   PetscFunctionBegin;
     347           0 :   PetscCall(MatShellGetContext(M,&matctx));
     348           0 :   if (right) PetscCall(VecDuplicate(matctx->w[0],right));
     349           0 :   if (left) PetscCall(VecDuplicate(matctx->w[0],left));
     350           0 :   PetscFunctionReturn(PETSC_SUCCESS);
     351             : }
     352             : 
     353         156 : static PetscErrorCode MatDestroy_NEPDeflation(Mat M)
     354             : {
     355         156 :   NEP_DEF_MATSHELL *matctx;
     356             : 
     357         156 :   PetscFunctionBegin;
     358         156 :   PetscCall(MatShellGetContext(M,&matctx));
     359         156 :   if (matctx->extop->szd) {
     360          44 :     PetscCall(BVDestroy(&matctx->U));
     361          44 :     PetscCall(PetscFree2(matctx->hfj,matctx->work));
     362          44 :     PetscCall(PetscFree2(matctx->A,matctx->B));
     363          44 :     PetscCall(VecDestroy(&matctx->w[0]));
     364          44 :     PetscCall(VecDestroy(&matctx->w[1]));
     365             :   }
     366         156 :   if (matctx->P != matctx->T) PetscCall(MatDestroy(&matctx->P));
     367         156 :   PetscCall(MatDestroy(&matctx->T));
     368         156 :   PetscCall(PetscFree(matctx));
     369         156 :   PetscFunctionReturn(PETSC_SUCCESS);
     370             : }
     371             : 
     372         990 : static PetscErrorCode NEPDeflationEvaluateBasis(NEP_EXT_OP extop,PetscScalar lambda,PetscInt n,PetscScalar *val,PetscBool jacobian)
     373             : {
     374         990 :   PetscScalar p;
     375         990 :   PetscInt    i;
     376             : 
     377         990 :   PetscFunctionBegin;
     378         990 :   if (!jacobian) {
     379         571 :     val[0] = 1.0;
     380         809 :     for (i=1;i<extop->n;i++) val[i] = val[i-1]*(lambda-extop->bc[i-1]);
     381             :   } else {
     382         419 :     val[0] = 0.0;
     383         419 :     p = 1.0;
     384         580 :     for (i=1;i<extop->n;i++) {
     385         161 :       val[i] = val[i-1]*(lambda-extop->bc[i-1])+p;
     386         161 :       p *= (lambda-extop->bc[i-1]);
     387             :     }
     388             :   }
     389         990 :   PetscFunctionReturn(PETSC_SUCCESS);
     390             : }
     391             : 
     392        2927 : static PetscErrorCode NEPDeflationComputeShellMat(NEP_EXT_OP extop,PetscScalar lambda,PetscBool jacobian,Mat *M)
     393             : {
     394        2927 :   NEP_DEF_MATSHELL *matctx,*matctxC;
     395        2927 :   PetscInt         nloc,mloc,n=extop->n,j,i,szd=extop->szd,ldh=szd+1,k;
     396        2927 :   Mat              F,Mshell,Mcomp;
     397        2927 :   PetscBool        ini=PETSC_FALSE;
     398        2927 :   PetscScalar      *hf,*hfj,*hfjp,sone=1.0,*hH,*hHprev,*pts,*B,*A,*Hj=extop->Hj,*basisv,zero=0.0;
     399        2927 :   PetscBLASInt     n_,info,szd_;
     400             : 
     401        2927 :   PetscFunctionBegin;
     402        2927 :   if (!M) Mshell = jacobian?extop->MJ:extop->MF;
     403          97 :   else Mshell = *M;
     404        2927 :   Mcomp = jacobian?extop->MF:extop->MJ;
     405        2927 :   if (!Mshell) {
     406         156 :     ini = PETSC_TRUE;
     407         156 :     PetscCall(PetscNew(&matctx));
     408         156 :     PetscCall(MatGetLocalSize(extop->nep->function,&mloc,&nloc));
     409         156 :     nloc += szd; mloc += szd;
     410         156 :     PetscCall(MatCreateShell(PetscObjectComm((PetscObject)extop->nep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&Mshell));
     411         156 :     PetscCall(MatShellSetOperation(Mshell,MATOP_MULT,(void(*)(void))MatMult_NEPDeflation));
     412         156 :     PetscCall(MatShellSetOperation(Mshell,MATOP_CREATE_VECS,(void(*)(void))MatCreateVecs_NEPDeflation));
     413         156 :     PetscCall(MatShellSetOperation(Mshell,MATOP_DESTROY,(void(*)(void))MatDestroy_NEPDeflation));
     414         156 :     matctx->nep = extop->nep;
     415         156 :     matctx->extop = extop;
     416         156 :     if (!M) {
     417         115 :       if (jacobian) { matctx->jacob = PETSC_TRUE; matctx->T = extop->nep->jacobian; extop->MJ = Mshell; }
     418          66 :       else { matctx->jacob = PETSC_FALSE; matctx->T = extop->nep->function; extop->MF = Mshell; }
     419         115 :       PetscCall(PetscObjectReference((PetscObject)matctx->T));
     420         115 :       if (!jacobian) {
     421          66 :         if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) {
     422           2 :           matctx->P = extop->nep->function_pre;
     423           2 :           PetscCall(PetscObjectReference((PetscObject)matctx->P));
     424          64 :         } else matctx->P = matctx->T;
     425             :       }
     426             :     } else {
     427          41 :       matctx->jacob = jacobian;
     428          41 :       PetscCall(MatDuplicate(jacobian?extop->nep->jacobian:extop->nep->function,MAT_DO_NOT_COPY_VALUES,&matctx->T));
     429          41 :       *M = Mshell;
     430          41 :       if (!jacobian) {
     431          41 :         if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) PetscCall(MatDuplicate(extop->nep->function_pre,MAT_DO_NOT_COPY_VALUES,&matctx->P));
     432          41 :         else matctx->P = matctx->T;
     433             :       }
     434             :     }
     435         156 :     if (szd) {
     436          44 :       PetscCall(BVCreateVec(extop->nep->V,matctx->w));
     437          44 :       PetscCall(VecDuplicate(matctx->w[0],matctx->w+1));
     438          44 :       PetscCall(BVDuplicateResize(extop->nep->V,szd,&matctx->U));
     439          44 :       PetscCall(PetscMalloc2(extop->simpU?2*(szd)*(szd):2*(szd)*(szd)*extop->nep->nt,&matctx->hfj,szd,&matctx->work));
     440          44 :       PetscCall(PetscMalloc2(szd*szd,&matctx->A,szd*szd,&matctx->B));
     441             :     }
     442        2771 :   } else PetscCall(MatShellGetContext(Mshell,&matctx));
     443        2771 :   if (ini || matctx->theta != lambda || matctx->n != extop->n) {
     444        2238 :     if (ini || matctx->theta != lambda) {
     445        2344 :       if (jacobian) PetscCall(NEPComputeJacobian(extop->nep,lambda,matctx->T));
     446        1337 :       else PetscCall(NEPComputeFunction(extop->nep,lambda,matctx->T,matctx->P));
     447             :     }
     448        2350 :     if (n) {
     449         758 :       matctx->hfjset = PETSC_FALSE;
     450         758 :       if (!extop->simpU) {
     451             :         /* likely hfjp has been already computed */
     452         561 :         if (Mcomp) {
     453         524 :           PetscCall(MatShellGetContext(Mcomp,&matctxC));
     454         524 :           if (matctxC->hfjset && matctxC->theta == lambda && matctxC->n == extop->n) {
     455         241 :             PetscCall(PetscArraycpy(matctx->hfj,matctxC->hfj,2*extop->szd*extop->szd*extop->nep->nt));
     456         241 :             matctx->hfjset = PETSC_TRUE;
     457             :           }
     458             :         }
     459         561 :         hfj = matctx->hfj; hfjp = matctx->hfj+extop->szd*extop->szd*extop->nep->nt;
     460         561 :         if (!matctx->hfjset) {
     461         320 :           PetscCall(NEPDeflationEvaluateHatFunction(extop,-1,lambda,NULL,hfj,hfjp,n));
     462         320 :           matctx->hfjset = PETSC_TRUE;
     463             :         }
     464         561 :         PetscCall(BVSetActiveColumns(matctx->U,0,n));
     465         561 :         hf = jacobian?hfjp:hfj;
     466         561 :         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,hf,&F));
     467         561 :         PetscCall(BVMatMult(extop->X,extop->nep->A[0],matctx->U));
     468         561 :         PetscCall(BVMultInPlace(matctx->U,F,0,n));
     469         561 :         PetscCall(BVSetActiveColumns(extop->W,0,extop->n));
     470        1678 :         for (j=1;j<extop->nep->nt;j++) {
     471        1117 :           PetscCall(BVMatMult(extop->X,extop->nep->A[j],extop->W));
     472        1117 :           PetscCall(MatDensePlaceArray(F,hf+j*n*n));
     473        1117 :           PetscCall(BVMult(matctx->U,1.0,1.0,extop->W,F));
     474        1117 :           PetscCall(MatDenseResetArray(F));
     475             :         }
     476         561 :         PetscCall(MatDestroy(&F));
     477             :       } else {
     478         197 :         hfj = matctx->hfj;
     479         197 :         PetscCall(BVSetActiveColumns(matctx->U,0,n));
     480         197 :         PetscCall(BVMatMult(extop->X,matctx->T,matctx->U));
     481         437 :         for (j=0;j<n;j++) {
     482         566 :           for (i=0;i<n;i++) hfj[j*n+i] = -extop->H[j*ldh+i];
     483         240 :           hfj[j*(n+1)] += lambda;
     484             :         }
     485         197 :         PetscCall(PetscBLASIntCast(n,&n_));
     486         197 :         PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
     487         197 :         PetscCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,hfj,&n_,&info));
     488         197 :         PetscCall(PetscFPTrapPop());
     489         197 :         SlepcCheckLapackInfo("trtri",info);
     490         197 :         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,hfj,&F));
     491         197 :         PetscCall(BVMultInPlace(matctx->U,F,0,n));
     492         197 :         if (jacobian) {
     493          83 :           PetscCall(NEPDeflationComputeFunction(extop,lambda,NULL));
     494          83 :           PetscCall(MatShellGetContext(extop->MF,&matctxC));
     495          83 :           PetscCall(BVMult(matctx->U,-1.0,1.0,matctxC->U,F));
     496             :         }
     497         197 :         PetscCall(MatDestroy(&F));
     498             :       }
     499         758 :       PetscCall(PetscCalloc3(n,&basisv,szd*szd,&hH,szd*szd,&hHprev));
     500         758 :       PetscCall(NEPDeflationEvaluateBasis(extop,lambda,n,basisv,jacobian));
     501         758 :       A = matctx->A;
     502         758 :       PetscCall(PetscArrayzero(A,szd*szd));
     503        1361 :       if (!jacobian) for (i=0;i<n;i++) A[i*(szd+1)] = 1.0;
     504        1798 :       for (j=0;j<n;j++)
     505        2664 :         for (i=0;i<n;i++)
     506        1624 :           for (k=1;k<extop->midx;k++) A[j*szd+i] += basisv[k]*PetscConj(Hj[k*szd*szd+i*szd+j]);
     507         758 :       PetscCall(PetscBLASIntCast(n,&n_));
     508         758 :       PetscCall(PetscBLASIntCast(szd,&szd_));
     509         758 :       B = matctx->B;
     510         758 :       PetscCall(PetscArrayzero(B,szd*szd));
     511         758 :       for (i=1;i<extop->midx;i++) {
     512           0 :         PetscCall(NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev));
     513           0 :         PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
     514           0 :         PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,B,&szd_));
     515           0 :         pts = hHprev; hHprev = hH; hH = pts;
     516             :       }
     517         758 :       PetscCall(PetscFree3(basisv,hH,hHprev));
     518             :     }
     519        2350 :     matctx->theta = lambda;
     520        2350 :     matctx->n = extop->n;
     521             :   }
     522        2927 :   PetscFunctionReturn(PETSC_SUCCESS);
     523             : }
     524             : 
     525        1732 : PetscErrorCode NEPDeflationComputeFunction(NEP_EXT_OP extop,PetscScalar lambda,Mat *F)
     526             : {
     527        1732 :   PetscFunctionBegin;
     528        1732 :   PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,NULL));
     529        1732 :   if (F) *F = extop->MF;
     530        1732 :   PetscFunctionReturn(PETSC_SUCCESS);
     531             : }
     532             : 
     533        1007 : PetscErrorCode NEPDeflationComputeJacobian(NEP_EXT_OP extop,PetscScalar lambda,Mat *J)
     534             : {
     535        1007 :   PetscFunctionBegin;
     536        1007 :   PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_TRUE,NULL));
     537        1007 :   if (J) *J = extop->MJ;
     538        1007 :   PetscFunctionReturn(PETSC_SUCCESS);
     539             : }
     540             : 
     541         188 : PetscErrorCode NEPDeflationSolveSetUp(NEP_EXT_OP extop,PetscScalar lambda)
     542             : {
     543         188 :   NEP_DEF_MATSHELL  *matctx;
     544         188 :   NEP_DEF_FUN_SOLVE solve;
     545         188 :   PetscInt          i,j,n=extop->n;
     546         188 :   Vec               u,tu;
     547         188 :   Mat               F;
     548         188 :   PetscScalar       snone=-1.0,sone=1.0;
     549         188 :   PetscBLASInt      n_,szd_,ldh_,*p,info;
     550         188 :   Mat               Mshell;
     551             : 
     552         188 :   PetscFunctionBegin;
     553         188 :   solve = extop->solve;
     554         188 :   if (lambda!=solve->theta || n!=solve->n) {
     555         188 :     PetscCall(NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,solve->sincf?NULL:&solve->T));
     556         188 :     Mshell = (solve->sincf)?extop->MF:solve->T;
     557         188 :     PetscCall(MatShellGetContext(Mshell,&matctx));
     558         188 :     PetscCall(NEP_KSPSetOperators(solve->ksp,matctx->T,matctx->P));
     559         188 :     if (!extop->ref && n) {
     560          70 :       PetscCall(PetscBLASIntCast(n,&n_));
     561          70 :       PetscCall(PetscBLASIntCast(extop->szd,&szd_));
     562          70 :       PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
     563          70 :       if (!extop->simpU) {
     564          57 :         PetscCall(BVSetActiveColumns(solve->T_1U,0,n));
     565         147 :         for (i=0;i<n;i++) {
     566          90 :           PetscCall(BVGetColumn(matctx->U,i,&u));
     567          90 :           PetscCall(BVGetColumn(solve->T_1U,i,&tu));
     568          90 :           PetscCall(KSPSolve(solve->ksp,u,tu));
     569          90 :           PetscCall(BVRestoreColumn(solve->T_1U,i,&tu));
     570          90 :           PetscCall(BVRestoreColumn(matctx->U,i,&u));
     571             :         }
     572          57 :         PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,n,n,solve->work,&F));
     573          57 :         PetscCall(BVDot(solve->T_1U,extop->X,F));
     574          57 :         PetscCall(MatDestroy(&F));
     575             :       } else {
     576          31 :         for (j=0;j<n;j++)
     577          46 :           for (i=0;i<n;i++) solve->work[j*n+i] = extop->XpX[j*extop->szd+i];
     578          31 :         for (i=0;i<n;i++) extop->H[i*ldh_+i] -= lambda;
     579          13 :         PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n_,&n_,&snone,extop->H,&ldh_,solve->work,&n_));
     580          31 :         for (i=0;i<n;i++) extop->H[i*ldh_+i] += lambda;
     581             :       }
     582          70 :       PetscCall(PetscArraycpy(solve->M,matctx->B,extop->szd*extop->szd));
     583          70 :       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,matctx->A,&szd_,solve->work,&n_,&sone,solve->M,&szd_));
     584          70 :       PetscCall(PetscMalloc1(n,&p));
     585          70 :       PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
     586          70 :       PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,solve->M,&szd_,p,&info));
     587          70 :       SlepcCheckLapackInfo("getrf",info);
     588          70 :       PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,solve->M,&szd_,p,solve->work,&n_,&info));
     589          70 :       SlepcCheckLapackInfo("getri",info);
     590          70 :       PetscCall(PetscFPTrapPop());
     591          70 :       PetscCall(PetscFree(p));
     592             :     }
     593         188 :     solve->theta = lambda;
     594         188 :     solve->n = n;
     595             :   }
     596         188 :   PetscFunctionReturn(PETSC_SUCCESS);
     597             : }
     598             : 
     599        3316 : PetscErrorCode NEPDeflationFunctionSolve(NEP_EXT_OP extop,Vec b,Vec x)
     600             : {
     601        3316 :   Vec               b1,x1;
     602        3316 :   PetscScalar       *xx,*bb,*x2,*b2,*w,*w2,snone=-1.0,sone=1.0,zero=0.0;
     603        3316 :   NEP_DEF_MATSHELL  *matctx;
     604        3316 :   NEP_DEF_FUN_SOLVE solve=extop->solve;
     605        3316 :   PetscBLASInt      one=1,szd_,n_,ldh_;
     606        3316 :   PetscInt          nloc,i;
     607        3316 :   PetscMPIInt       np,count;
     608             : 
     609        3316 :   PetscFunctionBegin;
     610        3316 :   if (extop->ref) PetscCall(VecZeroEntries(x));
     611        3316 :   if (extop->szd) {
     612        2163 :     x1 = solve->w[0]; b1 = solve->w[1];
     613        2163 :     PetscCall(VecGetArray(x,&xx));
     614        2163 :     PetscCall(VecPlaceArray(x1,xx));
     615        2163 :     PetscCall(VecGetArray(b,&bb));
     616        2163 :     PetscCall(VecPlaceArray(b1,bb));
     617             :   } else {
     618             :     b1 = b; x1 = x;
     619             :   }
     620        3316 :   PetscCall(KSPSolve(extop->solve->ksp,b1,x1));
     621        3316 :   if (!extop->ref && extop->n && extop->szd) {
     622        1136 :     PetscCall(PetscBLASIntCast(extop->szd,&szd_));
     623        1136 :     PetscCall(PetscBLASIntCast(extop->n,&n_));
     624        1136 :     PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
     625        1136 :     PetscCall(BVGetSizes(extop->nep->V,&nloc,NULL,NULL));
     626        1136 :     PetscCall(PetscMalloc2(extop->n,&b2,extop->n,&x2));
     627        1136 :     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)b),&np));
     628        2717 :     for (i=0;i<extop->n;i++) b2[i] = bb[nloc+i]*PetscSqrtReal(np);
     629        1136 :     w = solve->work; w2 = solve->work+extop->n;
     630        1136 :     PetscCall(MatShellGetContext(solve->sincf?extop->MF:solve->T,&matctx));
     631        1136 :     PetscCall(PetscArraycpy(w2,b2,extop->n));
     632        1136 :     PetscCall(BVDotVec(extop->X,x1,w));
     633        1136 :     PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&snone,matctx->A,&szd_,w,&one,&sone,w2,&one));
     634        1136 :     PetscCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,solve->M,&szd_,w2,&one,&zero,x2,&one));
     635        1136 :     if (extop->simpU) {
     636         577 :       for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] -= solve->theta;
     637         577 :       for (i=0;i<extop->n;i++) w[i] = x2[i];
     638         260 :       PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&one,&snone,extop->H,&ldh_,w,&n_));
     639         577 :       for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] += solve->theta;
     640         260 :       PetscCall(BVMultVec(extop->X,-1.0,1.0,x1,w));
     641         876 :     } else PetscCall(BVMultVec(solve->T_1U,-1.0,1.0,x1,x2));
     642        2717 :     for (i=0;i<extop->n;i++) xx[i+nloc] = x2[i]/PetscSqrtReal(np);
     643        1136 :     PetscCall(PetscMPIIntCast(extop->n,&count));
     644        2272 :     PetscCallMPI(MPI_Bcast(xx+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)b)));
     645             :   }
     646        3316 :   if (extop->szd) {
     647        2163 :     PetscCall(VecResetArray(x1));
     648        2163 :     PetscCall(VecRestoreArray(x,&xx));
     649        2163 :     PetscCall(VecResetArray(b1));
     650        2163 :     PetscCall(VecRestoreArray(b,&bb));
     651        2163 :     if (!extop->ref && extop->n) PetscCall(PetscFree2(b2,x2));
     652             :   }
     653        3316 :   PetscFunctionReturn(PETSC_SUCCESS);
     654             : }
     655             : 
     656         102 : PetscErrorCode NEPDeflationSetRefine(NEP_EXT_OP extop,PetscBool ref)
     657             : {
     658         102 :   PetscFunctionBegin;
     659         102 :   extop->ref = ref;
     660         102 :   PetscFunctionReturn(PETSC_SUCCESS);
     661             : }
     662             : 
     663         132 : PetscErrorCode NEPDeflationReset(NEP_EXT_OP extop)
     664             : {
     665         132 :   PetscInt          j;
     666         132 :   NEP_DEF_FUN_SOLVE solve;
     667             : 
     668         132 :   PetscFunctionBegin;
     669         132 :   if (!extop) PetscFunctionReturn(PETSC_SUCCESS);
     670          66 :   PetscCall(PetscFree(extop->H));
     671          66 :   PetscCall(BVDestroy(&extop->X));
     672          66 :   if (extop->szd) {
     673          19 :     PetscCall(VecDestroy(&extop->w));
     674          19 :     PetscCall(PetscFree3(extop->Hj,extop->XpX,extop->bc));
     675          19 :     PetscCall(BVDestroy(&extop->W));
     676             :   }
     677          66 :   PetscCall(MatDestroy(&extop->MF));
     678          66 :   PetscCall(MatDestroy(&extop->MJ));
     679          66 :   if (extop->solve) {
     680          66 :     solve = extop->solve;
     681          66 :     if (extop->szd) {
     682          19 :       if (!extop->simpU) PetscCall(BVDestroy(&solve->T_1U));
     683          19 :       PetscCall(PetscFree2(solve->M,solve->work));
     684          19 :       PetscCall(VecDestroy(&solve->w[0]));
     685          19 :       PetscCall(VecDestroy(&solve->w[1]));
     686             :     }
     687          66 :     if (!solve->sincf) PetscCall(MatDestroy(&solve->T));
     688          66 :     PetscCall(PetscFree(extop->solve));
     689             :   }
     690          66 :   if (extop->proj) {
     691          17 :     if (extop->szd) {
     692          12 :       for (j=0;j<extop->nep->nt;j++) PetscCall(MatDestroy(&extop->proj->V1pApX[j]));
     693           3 :       PetscCall(MatDestroy(&extop->proj->XpV1));
     694           3 :       PetscCall(PetscFree3(extop->proj->V2,extop->proj->V1pApX,extop->proj->work));
     695           3 :       PetscCall(VecDestroy(&extop->proj->w));
     696           3 :       PetscCall(BVDestroy(&extop->proj->V1));
     697             :     }
     698          17 :     PetscCall(PetscFree(extop->proj));
     699             :   }
     700          66 :   PetscCall(PetscFree(extop));
     701          66 :   PetscFunctionReturn(PETSC_SUCCESS);
     702             : }
     703             : 
     704          66 : PetscErrorCode NEPDeflationInitialize(NEP nep,BV X,KSP ksp,PetscBool sincfun,PetscInt sz,NEP_EXT_OP *extop)
     705             : {
     706          66 :   NEP_EXT_OP        op;
     707          66 :   NEP_DEF_FUN_SOLVE solve;
     708          66 :   PetscInt          szd;
     709          66 :   Vec               x;
     710             : 
     711          66 :   PetscFunctionBegin;
     712          66 :   PetscCall(NEPDeflationReset(*extop));
     713          66 :   PetscCall(PetscNew(&op));
     714          66 :   *extop  = op;
     715          66 :   op->nep = nep;
     716          66 :   op->n   = 0;
     717          66 :   op->szd = szd = sz-1;
     718          66 :   op->max_midx = PetscMin(MAX_MINIDX,szd);
     719          66 :   op->X = X;
     720          66 :   if (!X) PetscCall(BVDuplicateResize(nep->V,sz,&op->X));
     721          66 :   else PetscCall(PetscObjectReference((PetscObject)X));
     722          66 :   PetscCall(PetscCalloc1(sz*sz,&(op)->H));
     723          66 :   if (op->szd) {
     724          19 :     PetscCall(BVGetColumn(op->X,0,&x));
     725          19 :     PetscCall(VecDuplicate(x,&op->w));
     726          19 :     PetscCall(BVRestoreColumn(op->X,0,&x));
     727          19 :     op->simpU = PETSC_FALSE;
     728          19 :     if (nep->fui==NEP_USER_INTERFACE_SPLIT) {
     729             :       /* undocumented option to use the simple expression for U = T*X*inv(lambda-H) */
     730          17 :       PetscCall(PetscOptionsGetBool(NULL,NULL,"-nep_deflation_simpleu",&op->simpU,NULL));
     731             :     } else {
     732           2 :       op->simpU = PETSC_TRUE;
     733             :     }
     734          19 :     PetscCall(PetscCalloc3(szd*szd*op->max_midx,&(op)->Hj,szd*szd,&(op)->XpX,szd,&op->bc));
     735          19 :     PetscCall(BVDuplicateResize(op->X,op->szd,&op->W));
     736             :   }
     737          66 :   if (ksp) {
     738          66 :     PetscCall(PetscNew(&solve));
     739          66 :     op->solve    = solve;
     740          66 :     solve->ksp   = ksp;
     741          66 :     solve->sincf = sincfun;
     742          66 :     solve->n     = -1;
     743          66 :     if (op->szd) {
     744          19 :       if (!op->simpU) PetscCall(BVDuplicateResize(nep->V,szd,&solve->T_1U));
     745          19 :       PetscCall(PetscMalloc2(szd*szd,&solve->M,2*szd*szd,&solve->work));
     746          19 :       PetscCall(BVCreateVec(nep->V,&solve->w[0]));
     747          19 :       PetscCall(VecDuplicate(solve->w[0],&solve->w[1]));
     748             :     }
     749             :   }
     750          66 :   PetscFunctionReturn(PETSC_SUCCESS);
     751             : }
     752             : 
     753         850 : static PetscErrorCode NEPDeflationDSNEPComputeMatrix(DS ds,PetscScalar lambda,PetscBool deriv,DSMatType mat,void *ctx)
     754             : {
     755         850 :   Mat               A,Ei;
     756         850 :   PetscScalar       *T,*w1,*w2,*w=NULL,*ww,*hH,*hHprev,*pts;
     757         850 :   PetscScalar       alpha,alpha2,*AB,sone=1.0,zero=0.0,*basisv,s;
     758         850 :   const PetscScalar *E;
     759         850 :   PetscInt          i,ldds,nwork=0,szd,nv,j,k,n;
     760         850 :   PetscBLASInt      inc=1,nv_,ldds_,dim_,szdk,szd_,n_,ldh_;
     761         850 :   PetscMPIInt       np;
     762         850 :   NEP_DEF_PROJECT   proj=(NEP_DEF_PROJECT)ctx;
     763         850 :   NEP_EXT_OP        extop=proj->extop;
     764         850 :   NEP               nep=extop->nep;
     765             : 
     766         850 :   PetscFunctionBegin;
     767         850 :   PetscCall(DSGetDimensions(ds,&nv,NULL,NULL,NULL));
     768         850 :   PetscCall(DSGetLeadingDimension(ds,&ldds));
     769         850 :   PetscCall(DSGetMat(ds,mat,&A));
     770         850 :   PetscCall(MatZeroEntries(A));
     771             :   /* mat = V1^*T(lambda)V1 */
     772        3400 :   for (i=0;i<nep->nt;i++) {
     773        2550 :     if (deriv) PetscCall(FNEvaluateDerivative(nep->f[i],lambda,&alpha));
     774        1515 :     else PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha));
     775        2550 :     PetscCall(DSGetMat(ds,DSMatExtra[i],&Ei));
     776        2550 :     PetscCall(MatAXPY(A,alpha,Ei,SAME_NONZERO_PATTERN));
     777        2550 :     PetscCall(DSRestoreMat(ds,DSMatExtra[i],&Ei));
     778             :   }
     779         850 :   PetscCall(DSRestoreMat(ds,mat,&A));
     780         850 :   if (!extop->ref && extop->n) {
     781         232 :     PetscCall(DSGetArray(ds,mat,&T));
     782         232 :     n = extop->n;
     783         232 :     szd = extop->szd;
     784         232 :     PetscCall(PetscArrayzero(proj->work,proj->lwork));
     785         232 :     PetscCall(PetscBLASIntCast(nv,&nv_));
     786         232 :     PetscCall(PetscBLASIntCast(n,&n_));
     787         232 :     PetscCall(PetscBLASIntCast(ldds,&ldds_));
     788         232 :     PetscCall(PetscBLASIntCast(szd,&szd_));
     789         232 :     PetscCall(PetscBLASIntCast(proj->dim,&dim_));
     790         232 :     PetscCall(PetscBLASIntCast(extop->szd+1,&ldh_));
     791         232 :     w1 = proj->work; w2 = proj->work+proj->dim*proj->dim;
     792         232 :     nwork += 2*proj->dim*proj->dim;
     793             : 
     794             :     /* mat = mat + V1^*U(lambda)V2 */
     795         928 :     for (i=0;i<nep->nt;i++) {
     796         696 :       if (extop->simpU) {
     797         225 :         if (deriv) PetscCall(FNEvaluateDerivative(nep->f[i],lambda,&alpha));
     798         135 :         else PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha));
     799         225 :         ww = w1; w = w2;
     800         225 :         PetscCall(PetscArraycpy(ww,proj->V2,szd*nv));
     801         225 :         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np));
     802        2145 :         for (j=0;j<szd*nv;j++) ww[j] *= PetscSqrtReal(np);
     803         555 :         for (j=0;j<n;j++) extop->H[j*ldh_+j] -= lambda;
     804         225 :         alpha = -alpha;
     805         225 :         PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha,extop->H,&ldh_,ww,&szd_));
     806         225 :         if (deriv) {
     807          90 :           PetscCall(PetscBLASIntCast(szd*nv,&szdk));
     808          90 :           PetscCall(FNEvaluateFunction(nep->f[i],lambda,&alpha2));
     809          90 :           PetscCall(PetscArraycpy(w,proj->V2,szd*nv));
     810         858 :           for (j=0;j<szd*nv;j++) w[j] *= PetscSqrtReal(np);
     811          90 :           alpha2 = -alpha2;
     812          90 :           PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
     813          90 :           alpha2 = 1.0;
     814          90 :           PetscCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
     815          90 :           PetscCallBLAS("BLASaxpy",BLASaxpy_(&szdk,&sone,w,&inc,ww,&inc));
     816             :         }
     817         555 :         for (j=0;j<n;j++) extop->H[j*ldh_+j] += lambda;
     818             :       } else {
     819         471 :         PetscCall(NEPDeflationEvaluateHatFunction(extop,i,lambda,NULL,w1,w2,szd));
     820         471 :         w = deriv?w2:w1; ww = deriv?w1:w2;
     821         471 :         PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np));
     822         471 :         s = PetscSqrtReal(np);
     823         471 :         PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&s,w,&szd_,proj->V2,&szd_,&zero,ww,&szd_));
     824             :       }
     825         696 :       PetscCall(MatDenseGetArrayRead(proj->V1pApX[i],&E));
     826         696 :       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&nv_,&nv_,&n_,&sone,E,&dim_,ww,&szd_,&sone,T,&ldds_));
     827         696 :       PetscCall(MatDenseRestoreArrayRead(proj->V1pApX[i],&E));
     828             :     }
     829             : 
     830             :     /* mat = mat + V2^*A(lambda)V1 */
     831         232 :     basisv = proj->work+nwork; nwork += szd;
     832         232 :     hH     = proj->work+nwork; nwork += szd*szd;
     833         232 :     hHprev = proj->work+nwork; nwork += szd*szd;
     834         232 :     AB     = proj->work+nwork;
     835         232 :     PetscCall(NEPDeflationEvaluateBasis(extop,lambda,n,basisv,deriv));
     836         438 :     if (!deriv) for (i=0;i<n;i++) AB[i*(szd+1)] = 1.0;
     837         581 :     for (j=0;j<n;j++)
     838         932 :       for (i=0;i<n;i++)
     839         583 :         for (k=1;k<extop->midx;k++) AB[j*szd+i] += basisv[k]*PetscConj(extop->Hj[k*szd*szd+i*szd+j]);
     840         232 :     PetscCall(MatDenseGetArrayRead(proj->XpV1,&E));
     841         232 :     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,E,&szd_,&zero,w,&szd_));
     842         232 :     PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
     843         232 :     PetscCall(MatDenseRestoreArrayRead(proj->XpV1,&E));
     844             : 
     845             :     /* mat = mat + V2^*B(lambda)V2 */
     846         232 :     PetscCall(PetscArrayzero(AB,szd*szd));
     847         232 :     for (i=1;i<extop->midx;i++) {
     848           0 :       PetscCall(NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev));
     849           0 :       PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
     850           0 :       PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,AB,&szd_));
     851           0 :       pts = hHprev; hHprev = hH; hH = pts;
     852             :     }
     853         232 :     PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,proj->V2,&szd_,&zero,w,&szd_));
     854         232 :     PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
     855         232 :     PetscCall(DSRestoreArray(ds,mat,&T));
     856             :   }
     857         850 :   PetscFunctionReturn(PETSC_SUCCESS);
     858             : }
     859             : 
     860         160 : PetscErrorCode NEPDeflationProjectOperator(NEP_EXT_OP extop,BV Vext,DS ds,PetscInt j0,PetscInt j1)
     861             : {
     862         160 :   PetscInt        k,j,n=extop->n,dim;
     863         160 :   Vec             v,ve;
     864         160 :   BV              V1;
     865         160 :   Mat             G;
     866         160 :   NEP             nep=extop->nep;
     867         160 :   NEP_DEF_PROJECT proj;
     868             : 
     869         160 :   PetscFunctionBegin;
     870         160 :   NEPCheckSplit(extop->nep,1);
     871         160 :   proj = extop->proj;
     872         160 :   if (!proj) {
     873             :     /* Initialize the projection data structure */
     874          17 :     PetscCall(PetscNew(&proj));
     875          17 :     extop->proj = proj;
     876          17 :     proj->extop = extop;
     877          17 :     PetscCall(BVGetSizes(Vext,NULL,NULL,&dim));
     878          17 :     proj->dim = dim;
     879          17 :     if (extop->szd) {
     880           3 :       proj->lwork = 3*dim*dim+2*extop->szd*extop->szd+extop->szd;
     881           3 :       PetscCall(PetscMalloc3(dim*extop->szd,&proj->V2,nep->nt,&proj->V1pApX,proj->lwork,&proj->work));
     882          12 :       for (j=0;j<nep->nt;j++) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,proj->dim,extop->szd,NULL,&proj->V1pApX[j]));
     883           3 :       PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,extop->szd,proj->dim,NULL,&proj->XpV1));
     884           3 :       PetscCall(BVCreateVec(extop->X,&proj->w));
     885           3 :       PetscCall(BVDuplicateResize(extop->X,proj->dim,&proj->V1));
     886             :     }
     887          17 :     PetscCall(DSNEPSetComputeMatrixFunction(ds,NEPDeflationDSNEPComputeMatrix,(void*)proj));
     888             :   }
     889             : 
     890             :   /* Split Vext in V1 and V2 */
     891         160 :   if (extop->szd) {
     892         122 :     for (j=j0;j<j1;j++) {
     893          61 :       PetscCall(BVGetColumn(Vext,j,&ve));
     894          61 :       PetscCall(BVGetColumn(proj->V1,j,&v));
     895          61 :       PetscCall(NEPDeflationCopyToExtendedVec(extop,v,proj->V2+j*extop->szd,ve,PETSC_TRUE));
     896          61 :       PetscCall(BVRestoreColumn(proj->V1,j,&v));
     897          61 :       PetscCall(BVRestoreColumn(Vext,j,&ve));
     898             :     }
     899          61 :     V1 = proj->V1;
     900             :   } else V1 = Vext;
     901             : 
     902             :   /* Compute matrices V1^* A_i V1 */
     903         160 :   PetscCall(BVSetActiveColumns(V1,j0,j1));
     904         640 :   for (k=0;k<nep->nt;k++) {
     905         480 :     PetscCall(DSGetMat(ds,DSMatExtra[k],&G));
     906         480 :     PetscCall(BVMatProject(V1,nep->A[k],V1,G));
     907         480 :     PetscCall(DSRestoreMat(ds,DSMatExtra[k],&G));
     908             :   }
     909             : 
     910         160 :   if (extop->n) {
     911          42 :     if (extop->szd) {
     912             :       /* Compute matrices V1^* A_i X  and V1^* X */
     913          42 :       PetscCall(BVSetActiveColumns(extop->W,0,n));
     914         168 :       for (k=0;k<nep->nt;k++) {
     915         126 :         PetscCall(BVMatMult(extop->X,nep->A[k],extop->W));
     916         126 :         PetscCall(BVDot(extop->W,V1,proj->V1pApX[k]));
     917             :       }
     918          42 :       PetscCall(BVDot(V1,extop->X,proj->XpV1));
     919             :     }
     920             :   }
     921         160 :   PetscFunctionReturn(PETSC_SUCCESS);
     922             : }

Generated by: LCOV version 1.14