LCOV - code coverage report
Current view: top level - sys/classes/st/impls/filter - filtlan.c (source / functions) Hit Total Coverage
Test: SLEPc Lines: 526 550 95.6 %
Date: 2024-11-23 00:34:26 Functions: 15 15 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       3             :    SLEPc - Scalable Library for Eigenvalue Problem Computations
       4             :    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
       5             : 
       6             :    This file is part of SLEPc.
       7             :    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
       8             :    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
       9             : */
      10             : /*
      11             :    This file is an adaptation of several subroutines from FILTLAN, the
      12             :    Filtered Lanczos Package, authored by Haw-ren Fang and Yousef Saad.
      13             : 
      14             :    More information at:
      15             :    https://www-users.cs.umn.edu/~saad/software/filtlan
      16             : 
      17             :    References:
      18             : 
      19             :        [1] H. Fang and Y. Saad, "A filtered Lanczos procedure for extreme and interior
      20             :            eigenvalue problems", SIAM J. Sci. Comput. 34(4):A2220-A2246, 2012.
      21             : */
      22             : 
      23             : #include <slepc/private/stimpl.h>
      24             : #include "filter.h"
      25             : 
      26             : static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal*,PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal*,PetscInt);
      27             : static PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal);
      28             : static PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt,PetscReal,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*);
      29             : 
      30             : /* ////////////////////////////////////////////////////////////////////////////
      31             :    //    Newton - Hermite Polynomial Interpolation
      32             :    //////////////////////////////////////////////////////////////////////////// */
      33             : 
      34             : /*
      35             :    FILTLAN function NewtonPolynomial
      36             : 
      37             :    build P(z) by Newton's divided differences in the form
      38             :        P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1)),
      39             :    such that P(x(i)) = y(i) for i=1,...,n, where
      40             :        x,y are input vectors of length n, and a is the output vector of length n
      41             :    if x(i)==x(j) for some i!=j, then it is assumed that the derivative of P(z) is to be zero at x(i),
      42             :        and the Hermite polynomial interpolation is applied
      43             :    in general, if there are k x(i)'s with the same value x0, then
      44             :        the j-th order derivative of P(z) is zero at z=x0 for j=1,...,k-1
      45             : */
      46        1780 : static inline PetscErrorCode FILTLAN_NewtonPolynomial(PetscInt n,PetscReal *x,PetscReal *y,PetscReal *sa,PetscReal *sf)
      47             : {
      48        1780 :   PetscReal      d,*sx=x,*sy=y;
      49        1780 :   PetscInt       j,k;
      50             : 
      51        1780 :   PetscFunctionBegin;
      52        1780 :   PetscCall(PetscArraycpy(sf,sy,n));
      53             : 
      54             :   /* apply Newton's finite difference method */
      55        1780 :   sa[0] = sf[0];
      56       39160 :   for (j=1;j<n;j++) {
      57      448560 :     for (k=n-1;k>=j;k--) {
      58      411180 :       d = sx[k]-sx[k-j];
      59      411180 :       if (PetscUnlikely(d == 0.0)) sf[k] = 0.0;  /* assume that the derivative is 0.0 and apply the Hermite interpolation */
      60      198198 :       else sf[k] = (sf[k]-sf[k-1]) / d;
      61             :     }
      62       37380 :     sa[j] = sf[j];
      63             :   }
      64        1780 :   PetscFunctionReturn(PETSC_SUCCESS);
      65             : }
      66             : 
      67             : /*
      68             :    FILTLAN function HermiteBaseFilterInChebyshevBasis
      69             : 
      70             :    compute a base filter P(z) which is a continuous, piecewise polynomial P(z) expanded
      71             :    in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials in each interval
      72             : 
      73             :    The base filter P(z) equals P_j(z) for z in the j-th interval [intv(j), intv(j+1)), where
      74             :    P_j(z) a Hermite interpolating polynomial
      75             : 
      76             :    input:
      77             :    intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
      78             :    HiLowFlags determines the shape of the base filter P(z)
      79             :    Consider the j-th interval [intv(j), intv(j+1)]
      80             :    HighLowFlag[j-1]==1,  P(z)==1 for z in [intv(j), intv(j+1)]
      81             :                    ==0,  P(z)==0 for z in [intv(j), intv(j+1)]
      82             :                    ==-1, [intv(j), intv(j+1)] is a transition interval;
      83             :                          P(intv(j)) and P(intv(j+1)) are defined such that P(z) is continuous
      84             :    baseDeg is the degree of smoothness of the Hermite (piecewise polynomial) interpolation
      85             :    to be precise, the i-th derivative of P(z) is zero, i.e. d^{i}P(z)/dz^i==0, at all interval
      86             :    end points z=intv(j) for i=1,...,baseDeg
      87             : 
      88             :    output:
      89             :    P(z) expanded in a basis of `translated' (scale-and-shift) Chebyshev polynomials
      90             :    to be precise, for z in the j-th interval [intv(j),intv(j+1)), P(z) equals
      91             :        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
      92             :    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
      93             :        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
      94             :    with T_i(z) the Chebyshev polynomial of the first kind,
      95             :        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
      96             :    the return matrix is the matrix of Chebyshev coefficients pp just described
      97             : 
      98             :    note that the degree of P(z) in each interval is (at most) 2*baseDeg+1, with 2*baseDeg+2 coefficients
      99             :    let n be the length of intv; then there are n-1 intervals
     100             :    therefore the return matrix pp is of size (2*baseDeg+2)-by-(n-1)
     101             : */
     102         890 : static PetscErrorCode FILTLAN_HermiteBaseFilterInChebyshevBasis(PetscReal *baseFilter,PetscReal *intv,PetscInt npoints,const PetscInt *HighLowFlags,PetscInt baseDeg)
     103             : {
     104         890 :   PetscInt       m,ii,jj;
     105         890 :   PetscReal      flag,flag0,flag2,aa,bb,*px,*py,*sx,*sy,*pp,*qq,*sq,*sbf,*work,*currentPoint = intv;
     106         890 :   const PetscInt *hilo = HighLowFlags;
     107             : 
     108         890 :   PetscFunctionBegin;
     109         890 :   m = 2*baseDeg+2;
     110         890 :   jj = npoints-1;  /* jj is initialized as the number of intervals */
     111         890 :   PetscCall(PetscMalloc5(m,&px,m,&py,m,&pp,m,&qq,m,&work));
     112             :   sbf = baseFilter;
     113             : 
     114        5340 :   while (jj--) {  /* the main loop to compute the Chebyshev coefficients */
     115             : 
     116        4450 :     flag = (PetscReal)(*hilo++);  /* get the flag of the current interval */
     117        4450 :     if (flag == -1.0) {  /* flag == -1 means that the current interval is a transition polynomial */
     118             : 
     119        1780 :       flag2 = (PetscReal)(*hilo);  /* get flag2, the flag of the next interval */
     120        1780 :       flag0 = 1.0-flag2;       /* the flag of the previous interval is 1-flag2 */
     121             : 
     122             :       /* two pointers for traversing x[] and y[] */
     123        1780 :       sx = px;
     124        1780 :       sy = py;
     125             : 
     126             :       /* find the current interval [aa,bb] */
     127        1780 :       aa = *currentPoint++;
     128        1780 :       bb = *currentPoint;
     129             : 
     130             :       /* now left-hand side */
     131        1780 :       ii = baseDeg+1;
     132       21360 :       while (ii--) {
     133       19580 :         *sy++ = flag0;
     134       19580 :         *sx++ = aa;
     135             :       }
     136             : 
     137             :       /* now right-hand side */
     138             :       ii = baseDeg+1;
     139       21360 :       while (ii--) {
     140       19580 :         *sy++ = flag2;
     141       19580 :         *sx++ = bb;
     142             :       }
     143             : 
     144             :       /* build a Newton polynomial (indeed, the generalized Hermite interpolating polynomial) with x[] and y[] */
     145        1780 :       PetscCall(FILTLAN_NewtonPolynomial(m,px,py,pp,work));
     146             : 
     147             :       /* pp contains coefficients of the Newton polynomial P(z) in the current interval [aa,bb], where
     148             :          P(z) = pp(1) + pp(2)*(z-px(1)) + pp(3)*(z-px(1))*(z-px(2)) + ... + pp(n)*(z-px(1))*...*(z-px(n-1)) */
     149             : 
     150             :       /* translate the Newton coefficients to the Chebyshev coefficients */
     151        1780 :       PetscCall(FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(m,aa,bb,pp,px,qq,work));
     152             :       /* qq contains coefficients of the polynomial in [aa,bb] in the `translated' Chebyshev basis */
     153             : 
     154             :       /* copy the Chebyshev coefficients to baseFilter
     155             :          OCTAVE/MATLAB: B(:,j) = qq, where j = (npoints-1)-jj and B is the return matrix */
     156        1780 :       sq = qq;
     157        1780 :       ii = 2*baseDeg+2;
     158       40940 :       while (ii--) *sbf++ = *sq++;
     159             : 
     160             :     } else {
     161             : 
     162             :       /* a constant polynomial P(z)=flag, where either flag==0 or flag==1
     163             :        OCTAVE/MATLAB: B(1,j) = flag, where j = (npoints-1)-jj and B is the return matrix */
     164        2670 :       *sbf++ = flag;
     165             : 
     166             :       /* the other coefficients are all zero, since P(z) is a constant
     167             :        OCTAVE/MATLAB: B(1,j) = 0, where j = (npoints-1)-jj and B is the return matrix */
     168        2670 :       ii = 2*baseDeg+1;
     169       58740 :       while (ii--) *sbf++ = 0.0;
     170             : 
     171             :       /* for the next point */
     172        2670 :       currentPoint++;
     173             :     }
     174             :   }
     175         890 :   PetscCall(PetscFree5(px,py,pp,qq,work));
     176         890 :   PetscFunctionReturn(PETSC_SUCCESS);
     177             : }
     178             : 
     179             : /* ////////////////////////////////////////////////////////////////////////////
     180             :    //    Base Filter
     181             :    //////////////////////////////////////////////////////////////////////////// */
     182             : 
     183             : /*
     184             :    FILTLAN function GetIntervals
     185             : 
     186             :    this routine determines the intervals (including the transition one(s)) by an iterative process
     187             : 
     188             :    frame is a vector consisting of 4 ordered elements:
     189             :        [frame(1),frame(4)] is the interval which (tightly) contains all eigenvalues, and
     190             :        [frame(2),frame(3)] is the interval in which the eigenvalues are sought
     191             :    baseDeg is the left-and-right degree of the base filter for each interval
     192             :    polyDeg is the (maximum possible) degree of s(z), with z*s(z) the polynomial filter
     193             :    intv is the output; the j-th interval is [intv(j),intv(j+1))
     194             :    opts is a collection of interval options
     195             : 
     196             :    the base filter P(z) is a piecewise polynomial from Hermite interpolation with degree baseDeg
     197             :    at each end point of intervals
     198             : 
     199             :    the polynomial filter Q(z) is in the form z*s(z), i.e. Q(0)==0, such that ||(1-P(z))-z*s(z)||_w is
     200             :    minimized with s(z) a polynomial of degree up to polyDeg
     201             : 
     202             :    the resulting polynomial filter Q(z) satisfies Q(x)>=Q(y) for x in [frame[1],frame[2]], and
     203             :    y in [frame[0],frame[3]] but not in [frame[1],frame[2]]
     204             : 
     205             :    the routine fills a PolynomialFilterInfo struct which gives some information of the polynomial filter
     206             : */
     207           7 : static PetscErrorCode FILTLAN_GetIntervals(PetscReal *intervals,PetscReal *frame,PetscInt polyDeg,PetscInt baseDeg,FILTLAN_IOP opts,FILTLAN_PFI filterInfo)
     208             : {
     209           7 :   PetscReal       intv[6],x,y,z1,z2,c,c1,c2,fc,fc2,halfPlateau,leftDelta,rightDelta,gridSize;
     210           7 :   PetscReal       yLimit,ySummit,yLeftLimit,yRightLimit,bottom,qIndex,*baseFilter,*polyFilter;
     211           7 :   PetscReal       yLimitGap=0.0,yLeftSummit=0.0,yLeftBottom=0.0,yRightSummit=0.0,yRightBottom=0.0;
     212           7 :   PetscInt        i,ii,npoints,numIter,numLeftSteps=1,numRightSteps=1,numMoreLooked=0;
     213           7 :   PetscBool       leftBoundaryMet=PETSC_FALSE,rightBoundaryMet=PETSC_FALSE,stepLeft,stepRight;
     214           7 :   const PetscReal a=frame[0],a1=frame[1],b1=frame[2],b=frame[3];
     215           7 :   const PetscInt  HighLowFlags[5] = { 1, -1, 0, -1, 1 };  /* if filterType is 1, only first 3 elements will be used */
     216           7 :   const PetscInt  numLookMore = 2*(PetscInt)(0.5+(PetscLogReal(2.0)/PetscLogReal(opts->shiftStepExpanRate)));
     217             : 
     218           7 :   PetscFunctionBegin;
     219           7 :   PetscCheck(a<=a1 && a1<=b1 && b1<=b,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Values in the frame vector should be non-decreasing");
     220           7 :   PetscCheck(a1!=b1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The range of wanted eigenvalues cannot be of size zero");
     221           7 :   filterInfo->filterType = 2;      /* mid-pass filter, for interior eigenvalues */
     222           7 :   if (b == b1) {
     223           0 :     PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"A polynomial filter should not cover all eigenvalues");
     224           0 :     filterInfo->filterType = 1;    /* high-pass filter, for largest eigenvalues */
     225           7 :   } else PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"filterType==3 for smallest eigenvalues should be pre-converted to filterType==1 for largest eigenvalues");
     226             : 
     227             :   /* the following recipe follows Yousef Saad (2005, 2006) with a few minor adaptations / enhancements */
     228           7 :   halfPlateau = 0.5*(b1-a1)*opts->initialPlateau;    /* half length of the "plateau" of the (dual) base filter */
     229           7 :   leftDelta = (b1-a1)*opts->initialShiftStep;        /* initial left shift */
     230           7 :   rightDelta = leftDelta;                            /* initial right shift */
     231           7 :   opts->numGridPoints = PetscMax(opts->numGridPoints,(PetscInt)(2.0*(b-a)/halfPlateau));
     232           7 :   gridSize = (b-a) / (PetscReal)opts->numGridPoints;
     233             : 
     234          49 :   for (i=0;i<6;i++) intv[i] = 0.0;
     235           7 :   if (filterInfo->filterType == 2) {  /* for interior eigenvalues */
     236           7 :     npoints = 6;
     237           7 :     intv[0] = a;
     238           7 :     intv[5] = b;
     239             :     /* intv[1], intv[2], intv[3], and intv[4] to be determined */
     240             :   } else { /* filterType == 1 (or 3 with conversion), for extreme eigenvalues */
     241           0 :     npoints = 4;
     242           0 :     intv[0] = a;
     243           0 :     intv[3] = b;
     244             :     /* intv[1], and intv[2] to be determined */
     245             :   }
     246           7 :   z1 = a1 - leftDelta;
     247           7 :   z2 = b1 + rightDelta;
     248           7 :   filterInfo->filterOK = 0;  /* not yet found any OK filter */
     249             : 
     250             :   /* allocate matrices */
     251           7 :   PetscCall(PetscMalloc2((polyDeg+2)*(npoints-1),&polyFilter,(2*baseDeg+2)*(npoints-1),&baseFilter));
     252             : 
     253             :   /* initialize the intervals, mainly for the case opts->maxOuterIter == 0 */
     254           7 :   intervals[0] = intv[0];
     255           7 :   intervals[3] = intv[3];
     256           7 :   intervals[5] = intv[5];
     257           7 :   intervals[1] = z1;
     258           7 :   if (filterInfo->filterType == 2) {  /* a mid-pass filter for interior eigenvalues */
     259           7 :     intervals[4] = z2;
     260           7 :     c = (a1+b1) / 2.0;
     261           7 :     intervals[2] = c - halfPlateau;
     262           7 :     intervals[3] = c + halfPlateau;
     263             :   } else {  /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
     264           0 :     intervals[2] = z1 + (b1-z1)*opts->transIntervalRatio;
     265             :   }
     266             : 
     267             :   /* the main loop */
     268          71 :   for (numIter=1;numIter<=opts->maxOuterIter;numIter++) {
     269          71 :     if (z1 <= a) {  /* outer loop updates z1 and z2 */
     270          18 :       z1 = a;
     271          18 :       leftBoundaryMet = PETSC_TRUE;
     272             :     }
     273          71 :     if (filterInfo->filterType == 2) {  /* a <= z1 < (a1) */
     274          71 :       if (z2 >= b) {  /* a mid-pass filter for interior eigenvalues */
     275           0 :         z2 = b;
     276           0 :         rightBoundaryMet = PETSC_TRUE;
     277             :       }
     278             :       /* a <= z1 < c-h < c+h < z2 <= b, where h is halfPlateau */
     279             :       /* [z1, c-h] and [c+h, z2] are transition interval */
     280          71 :       intv[1] = z1;
     281          71 :       intv[4] = z2;
     282          71 :       c1 = z1 + halfPlateau;
     283          71 :       intv[2] = z1;           /* i.e. c1 - halfPlateau */
     284          71 :       intv[3] = c1 + halfPlateau;
     285          71 :       PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
     286          71 :       PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
     287             :       /* fc1 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); */
     288          71 :       c2 = z2 - halfPlateau;
     289          71 :       intv[2] = c2 - halfPlateau;
     290          71 :       intv[3] = z2;           /* i.e. c2 + halfPlateau */
     291          71 :       PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
     292          71 :       PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
     293          71 :       fc2 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
     294          71 :       yLimitGap = PETSC_MAX_REAL;
     295          71 :       ii = opts->maxInnerIter;
     296         812 :       while (ii-- && !(yLimitGap <= opts->yLimitTol)) {
     297             :         /* recursive bisection to get c such that p(a1) are p(b1) approximately the same */
     298         741 :         c = (c1+c2) / 2.0;
     299         741 :         intv[2] = c - halfPlateau;
     300         741 :         intv[3] = c + halfPlateau;
     301         741 :         PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
     302         741 :         PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
     303         741 :         fc = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
     304         741 :         if (fc*fc2 < 0.0) {
     305             :           c1 = c;
     306             :           /* fc1 = fc; */
     307             :         } else {
     308         410 :           c2 = c;
     309         410 :           fc2 = fc;
     310             :         }
     311         741 :         yLimitGap = PetscAbsReal(fc);
     312             :       }
     313             :     } else {  /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
     314           0 :       intv[1] = z1;
     315           0 :       intv[2] = z1 + (b1-z1)*opts->transIntervalRatio;
     316           0 :       PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,4,HighLowFlags,baseDeg));
     317           0 :       PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,4,opts->intervalWeights,polyDeg));
     318             :     }
     319             :     /* polyFilter contains the coefficients of the polynomial filter which approximates phi(x)
     320             :        expanded in the `translated' Chebyshev basis */
     321             :     /* psi(x) = 1.0 - phi(x) is the dual base filter approximated by a polynomial in the form x*p(x) */
     322          71 :     yLeftLimit  = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
     323          71 :     yRightLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1);
     324          71 :     yLimit  = (yLeftLimit < yRightLimit) ? yLeftLimit : yRightLimit;
     325          71 :     ySummit = (yLeftLimit > yRightLimit) ? yLeftLimit : yRightLimit;
     326             :     x = a1;
     327        2840 :     while ((x+=gridSize) < b1) {
     328        2769 :       y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
     329        2769 :       if (y < yLimit)  yLimit  = y;
     330        2769 :       if (y > ySummit) ySummit = y;
     331             :     }
     332             :     /* now yLimit is the minimum of x*p(x) for x in [a1, b1] */
     333          71 :     stepLeft  = PETSC_FALSE;
     334          71 :     stepRight = PETSC_FALSE;
     335          71 :     if ((yLimit < yLeftLimit && yLimit < yRightLimit) || yLimit < opts->yBottomLine) {
     336             :       /* very bad, step to see what will happen */
     337          25 :       stepLeft = PETSC_TRUE;
     338          25 :       if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
     339          46 :     } else if (filterInfo->filterType == 2) {
     340          46 :       if (yLeftLimit < yRightLimit) {
     341          31 :         if (yRightLimit-yLeftLimit > opts->yLimitTol) stepLeft = PETSC_TRUE;
     342          15 :       } else if (yLeftLimit-yRightLimit > opts->yLimitTol) stepRight = PETSC_TRUE;
     343             :     }
     344          40 :     if (!stepLeft) {
     345             :       yLeftBottom = yLeftLimit;
     346             :       x = a1;
     347        2100 :       while ((x-=gridSize) >= a) {
     348        2095 :         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
     349        2095 :         if (y < yLeftBottom) yLeftBottom = y;
     350          41 :         else if (y > yLeftBottom) break;
     351             :       }
     352             :       yLeftSummit = yLeftBottom;
     353      119410 :       while ((x-=gridSize) >= a) {
     354      119364 :         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
     355      119364 :         if (y > yLeftSummit) {
     356        1713 :           yLeftSummit = y;
     357        1713 :           if (yLeftSummit > yLimit*opts->yRippleLimit) {
     358             :             stepLeft = PETSC_TRUE;
     359             :             break;
     360             :           }
     361             :         }
     362      119364 :         if (y < yLeftBottom) yLeftBottom = y;
     363             :       }
     364             :     }
     365          71 :     if (filterInfo->filterType == 2 && !stepRight) {
     366             :       yRightBottom = yRightLimit;
     367             :       x = b1;
     368        2263 :       while ((x+=gridSize) <= b) {
     369        2263 :         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
     370        2263 :         if (y < yRightBottom) yRightBottom = y;
     371          46 :         else if (y > yRightBottom) break;
     372             :       }
     373             :       yRightSummit = yRightBottom;
     374       97039 :       while ((x+=gridSize) <= b) {
     375       96993 :         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
     376       96993 :         if (y > yRightSummit) {
     377        1735 :           yRightSummit = y;
     378        1735 :           if (yRightSummit > yLimit*opts->yRippleLimit) {
     379             :             stepRight = PETSC_TRUE;
     380             :             break;
     381             :           }
     382             :         }
     383       96993 :         if (y < yRightBottom) yRightBottom = y;
     384             :       }
     385             :     }
     386          71 :     if (!stepLeft && !stepRight) {
     387          46 :       if (filterInfo->filterType == 2) bottom = PetscMin(yLeftBottom,yRightBottom);
     388             :       else bottom = yLeftBottom;
     389          46 :       qIndex = 1.0 - (yLimit-bottom) / (ySummit-bottom);
     390          46 :       if (filterInfo->filterOK == 0 || filterInfo->filterQualityIndex < qIndex) {
     391             :         /* found the first OK filter or a better filter */
     392         126 :         for (i=0;i<6;i++) intervals[i] = intv[i];
     393          18 :         filterInfo->filterOK           = 1;
     394          18 :         filterInfo->filterQualityIndex = qIndex;
     395          18 :         filterInfo->numIter            = numIter;
     396          18 :         filterInfo->yLimit             = yLimit;
     397          18 :         filterInfo->ySummit            = ySummit;
     398          18 :         filterInfo->numLeftSteps       = numLeftSteps;
     399          18 :         filterInfo->yLeftSummit        = yLeftSummit;
     400          18 :         filterInfo->yLeftBottom        = yLeftBottom;
     401          18 :         if (filterInfo->filterType == 2) {
     402          18 :           filterInfo->yLimitGap        = yLimitGap;
     403          18 :           filterInfo->numRightSteps    = numRightSteps;
     404          18 :           filterInfo->yRightSummit     = yRightSummit;
     405          18 :           filterInfo->yRightBottom     = yRightBottom;
     406             :         }
     407             :         numMoreLooked = 0;
     408          28 :       } else if (++numMoreLooked == numLookMore) {
     409             :         /* filter has been optimal */
     410           7 :         filterInfo->filterOK = 2;
     411           7 :         break;
     412             :       }
     413             :       /* try stepping further to see whether it can improve */
     414          39 :       stepLeft = PETSC_TRUE;
     415          39 :       if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
     416             :     }
     417             :     /* check whether we can really "step" */
     418          64 :     if (leftBoundaryMet) {
     419          14 :       if (filterInfo->filterType == 1 || rightBoundaryMet) break;  /* cannot step further, so break the loop */
     420          14 :       if (stepLeft) {
     421             :         /* cannot step left, so try stepping right */
     422          14 :         stepLeft  = PETSC_FALSE;
     423          14 :         stepRight = PETSC_TRUE;
     424             :       }
     425             :     }
     426          64 :     if (rightBoundaryMet && stepRight) {
     427             :       /* cannot step right, so try stepping left */
     428             :       stepRight = PETSC_FALSE;
     429             :       stepLeft  = PETSC_TRUE;
     430             :     }
     431             :     /* now "step" */
     432          64 :     if (stepLeft) {
     433          50 :       numLeftSteps++;
     434          50 :       if (filterInfo->filterType == 2) leftDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
     435          50 :       z1 -= leftDelta;
     436             :     }
     437          64 :     if (stepRight) {
     438          64 :       numRightSteps++;
     439          64 :       rightDelta *= opts->shiftStepExpanRate;  /* expand the step for faster convergence */
     440          64 :       z2 += rightDelta;
     441             :     }
     442          64 :     if (filterInfo->filterType == 2) {
     443             :       /* shrink the "plateau" of the (dual) base filter */
     444          64 :       if (stepLeft && stepRight) halfPlateau /= opts->plateauShrinkRate;
     445          14 :       else halfPlateau /= PetscSqrtReal(opts->plateauShrinkRate);
     446             :     }
     447             :   }
     448           7 :   PetscCheck(filterInfo->filterOK,PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"STFILTER cannot get the filter specified; please adjust your filter parameters (e.g. increasing the polynomial degree)");
     449             : 
     450           7 :   filterInfo->totalNumIter = numIter;
     451           7 :   PetscCall(PetscFree2(polyFilter,baseFilter));
     452           7 :   PetscFunctionReturn(PETSC_SUCCESS);
     453             : }
     454             : 
     455             : /* ////////////////////////////////////////////////////////////////////////////
     456             :    //    Chebyshev Polynomials
     457             :    //////////////////////////////////////////////////////////////////////////// */
     458             : 
     459             : /*
     460             :    FILTLAN function ExpandNewtonPolynomialInChebyshevBasis
     461             : 
     462             :    translate the coefficients of a Newton polynomial to the coefficients
     463             :    in a basis of the `translated' (scale-and-shift) Chebyshev polynomials
     464             : 
     465             :    input:
     466             :    a Newton polynomial defined by vectors a and x:
     467             :        P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1))
     468             :    the interval [aa,bb] defines the `translated' Chebyshev polynomials S_i(z) = T_i((z-c)/h),
     469             :        where c=(aa+bb)/2 and h=(bb-aa)/2, and T_i is the Chebyshev polynomial of the first kind
     470             :    note that T_i is defined by T_0(z)=1, T_1(z)=z, and T_i(z)=2*z*T_{i-1}(z)+T_{i-2}(z) for i>=2
     471             : 
     472             :    output:
     473             :    a vector q containing the Chebyshev coefficients:
     474             :        P(z) = q(1)*S_0(z) + q(2)*S_1(z) + ... + q(n)*S_{n-1}(z)
     475             : */
     476        1780 : static inline PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt n,PetscReal aa,PetscReal bb,PetscReal *a,PetscReal *x,PetscReal *q,PetscReal *q2)
     477             : {
     478        1780 :   PetscInt  m,mm;
     479        1780 :   PetscReal *sa,*sx,*sq,*sq2,c,c2,h,h2;
     480             : 
     481        1780 :   PetscFunctionBegin;
     482        1780 :   sa = a+n;    /* pointers for traversing a and x */
     483        1780 :   sx = x+n-1;
     484        1780 :   *q = *--sa;  /* set q[0] = a(n) */
     485             : 
     486        1780 :   c = (aa+bb)/2.0;
     487        1780 :   h = (bb-aa)/2.0;
     488        1780 :   h2 = h/2.0;
     489             : 
     490       39160 :   for (m=1;m<=n-1;m++) {  /* the main loop for translation */
     491             : 
     492             :     /* compute q2[0:m-1] = (c-x[n-m-1])*q[0:m-1] */
     493       37380 :     mm = m;
     494       37380 :     sq = q;
     495       37380 :     sq2 = q2;
     496       37380 :     c2 = c-(*--sx);
     497      448560 :     while (mm--) *(sq2++) = c2*(*sq++);
     498       37380 :     *sq2 = 0.0;         /* q2[m] = 0.0 */
     499       37380 :     *(q2+1) += h*(*q);  /* q2[1] = q2[1] + h*q[0] */
     500             : 
     501             :     /* compute q2[0:m-2] = q2[0:m-2] + h2*q[1:m-1] */
     502       37380 :     mm = m-1;
     503       37380 :     sq2 = q2;
     504       37380 :     sq = q+1;
     505      411180 :     while (mm--) *(sq2++) += h2*(*sq++);
     506             : 
     507             :     /* compute q2[2:m] = q2[2:m] + h2*q[1:m-1] */
     508       37380 :     mm = m-1;
     509       37380 :     sq2 = q2+2;
     510       37380 :     sq = q+1;
     511      411180 :     while (mm--) *(sq2++) += h2*(*sq++);
     512             : 
     513             :     /* compute q[0:m] = q2[0:m] */
     514       37380 :     mm = m+1;
     515       37380 :     sq2 = q2;
     516       37380 :     sq = q;
     517      485940 :     while (mm--) *sq++ = *sq2++;
     518       37380 :     *q += (*--sa);      /* q[0] = q[0] + p[n-m-1] */
     519             :   }
     520        1780 :   PetscFunctionReturn(PETSC_SUCCESS);
     521             : }
     522             : 
     523             : /*
     524             :    FILTLAN function PolynomialEvaluationInChebyshevBasis
     525             : 
     526             :    evaluate P(z) at z=z0, where P(z) is a polynomial expanded in a basis of
     527             :    the `translated' (i.e. scale-and-shift) Chebyshev polynomials
     528             : 
     529             :    input:
     530             :    c is a vector of Chebyshev coefficients which defines the polynomial
     531             :        P(z) = c(1)*S_0(z) + c(2)*S_1(z) + ... + c(n)*S_{n-1}(z),
     532             :    where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
     533             :        c = (intv(j)+intv(j+1)) / 2,  h = (intv(j+1)-intv(j)) / 2
     534             :    note that T_i(z) is the Chebyshev polynomial of the first kind,
     535             :        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
     536             : 
     537             :    output:
     538             :    the evaluated value of P(z) at z=z0
     539             : */
     540      225250 : static inline PetscReal FILTLAN_PolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscInt idx,PetscReal z0,PetscReal aa,PetscReal bb)
     541             : {
     542      225250 :   PetscInt  ii,deg1;
     543      225250 :   PetscReal y,zz,t0,t1,t2,*sc;
     544             : 
     545      225250 :   PetscFunctionBegin;
     546      225250 :   deg1 = m;
     547      225250 :   if (aa==-1.0 && bb==1.0) zz = z0;  /* treat it as a special case to reduce rounding errors */
     548      225250 :   else zz = (aa==bb)? 0.0 : -1.0+2.0*(z0-aa)/(bb-aa);
     549             : 
     550             :   /* compute y = P(z0), where we utilize the Chebyshev recursion */
     551      225250 :   sc = pp+(idx-1)*m;   /* sc points to column idx of pp */
     552      225250 :   y = *sc++;
     553      225250 :   t1 = 1.0; t2 = zz;
     554      225250 :   ii = deg1-1;
     555    44393250 :   while (ii--) {
     556             :     /* Chebyshev recursion: T_0(zz)=1, T_1(zz)=zz, and T_{i+1}(zz) = 2*zz*T_i(zz) + T_{i-1}(zz) for i>=2
     557             :        the values of T_{i+1}(zz), T_i(zz), T_{i-1}(zz) are stored in t0, t1, t2, respectively */
     558    44168000 :     t0 = 2*zz*t1 - t2;
     559             :     /* it also works for the base case / the first iteration, where t0 equals 2*zz*1-zz == zz which is T_1(zz) */
     560    44168000 :     t2 = t1;
     561    44168000 :     t1 = t0;
     562    44168000 :     y += (*sc++)*t0;
     563             :   }
     564      225250 :   PetscFunctionReturn(y);
     565             : }
     566             : 
     567             : #define basisTranslated PETSC_TRUE
     568             : /*
     569             :    FILTLAN function PiecewisePolynomialEvaluationInChebyshevBasis
     570             : 
     571             :    evaluate P(z) at z=z0, where P(z) is a piecewise polynomial expanded
     572             :    in a basis of the (optionally translated, i.e. scale-and-shift) Chebyshev polynomials for each interval
     573             : 
     574             :    input:
     575             :    intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
     576             :    pp is a matrix of Chebyshev coefficients which defines a piecewise polynomial P(z)
     577             :    in a basis of the `translated' Chebyshev polynomials in each interval
     578             :    the polynomial P_j(z) in the j-th interval, i.e. when z is in [intv(j), intv(j+1)), is defined by the j-th column of pp:
     579             :        if basisTranslated == false, then
     580             :            P_j(z) = pp(1,j)*T_0(z) + pp(2,j)*T_1(z) + ... + pp(n,j)*T_{n-1}(z),
     581             :        where T_i(z) is the Chebyshev polynomial of the first kind,
     582             :            T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
     583             :        if basisTranslated == true, then
     584             :            P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
     585             :        where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
     586             :            c = (intv(j)+intv(j+1)) / 2,  h = (intv(j+1)-intv(j)) / 2
     587             : 
     588             :    output:
     589             :    the evaluated value of P(z) at z=z0
     590             : 
     591             :    note that if z0 falls below the first interval, then the polynomial in the first interval will be used to evaluate P(z0)
     592             :              if z0 flies over  the last  interval, then the polynomial in the last  interval will be used to evaluate P(z0)
     593             : */
     594      225250 : static inline PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscReal *intv,PetscInt npoints,PetscReal z0)
     595             : {
     596      225250 :   PetscReal *sintv,aa,bb,resul;
     597      225250 :   PetscInt  idx;
     598             : 
     599      225250 :   PetscFunctionBegin;
     600             :   /* determine the interval which contains z0 */
     601      225250 :   sintv = &intv[1];
     602      225250 :   idx = 1;
     603      225250 :   if (npoints>2 && z0 >= *sintv) {
     604      104396 :     sintv++;
     605      405676 :     while (++idx < npoints-1) {
     606      307294 :       if (z0 < *sintv) break;
     607      301280 :       sintv++;
     608             :     }
     609             :   }
     610             :   /* idx==1 if npoints<=2; otherwise idx satisfies:
     611             :          intv(idx) <= z0 < intv(idx+1),  if 2 <= idx <= npoints-2
     612             :          z0 < intv(idx+1),               if idx == 1
     613             :          intv(idx) <= z0,                if idx == npoints-1
     614             :      in addition, sintv points to &intv(idx+1) */
     615      225250 :   if (basisTranslated) {
     616             :     /* the basis consists of `translated' Chebyshev polynomials */
     617             :     /* find the interval of concern, [aa,bb] */
     618      225250 :     aa = *(sintv-1);
     619      225250 :     bb = *sintv;
     620      225250 :     resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,aa,bb);
     621             :   } else {
     622             :     /* the basis consists of standard Chebyshev polynomials, with interval [-1.0,1.0] for integration */
     623             :     resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,-1.0,1.0);
     624             :   }
     625      225250 :   PetscFunctionReturn(resul);
     626             : }
     627             : 
     628             : /*
     629             :    FILTLAN function PiecewisePolynomialInnerProductInChebyshevBasis
     630             : 
     631             :    compute the weighted inner product of two piecewise polynomials expanded
     632             :    in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
     633             : 
     634             :    pp and qq are two matrices of Chebyshev coefficients which define the piecewise polynomials P(z) and Q(z), respectively
     635             :    for z in the j-th interval, P(z) equals
     636             :        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
     637             :    and Q(z) equals
     638             :        Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
     639             :    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
     640             :        S_i((z-c)/h) = T_i(z),  c = (aa+bb)) / 2,  h = (bb-aa) / 2,
     641             :    with T_i(z) the Chebyshev polynomial of the first kind
     642             :        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
     643             : 
     644             :    the (scaled) j-th interval inner product is defined by
     645             :        <P_j,Q_j> = (Pi/2)*(pp(1,j)*qq(1,j) + sum_{k} pp(k,j)*qq(k,j)),
     646             :    which comes from the property
     647             :        <T_0,T_0>=pi, <T_i,T_i>=pi/2 for i>=1, and <T_i,T_j>=0 for i!=j
     648             : 
     649             :    the weighted inner product is <P,Q> = sum_{j} intervalWeights(j)*<P_j,Q_j>,
     650             :    which is the return value
     651             : 
     652             :    note that for unit weights, pass an empty vector of intervalWeights (i.e. of length 0)
     653             : */
     654      607568 : static inline PetscReal FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(PetscReal *pp,PetscInt prows,PetscInt pcols,PetscInt ldp,PetscReal *qq,PetscInt qrows,PetscInt qcols,PetscInt ldq,const PetscReal *intervalWeights)
     655             : {
     656      607568 :   PetscInt  nintv,deg1,i,k;
     657      607568 :   PetscReal *sp,*sq,ans=0.0,ans2;
     658             : 
     659      607568 :   PetscFunctionBegin;
     660      607568 :   deg1 = PetscMin(prows,qrows);  /* number of effective coefficients, one more than the effective polynomial degree */
     661      607568 :   if (PetscUnlikely(!deg1)) PetscFunctionReturn(0.0);
     662      607568 :   nintv = PetscMin(pcols,qcols);  /* number of intervals */
     663             : 
     664             :   /* scaled by intervalWeights(i) in the i-th interval (we assume intervalWeights[] are always provided).
     665             :      compute ans = sum_{i=1,...,nintv} intervalWeights(i)*[ pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) ] */
     666     3645408 :   for (i=0;i<nintv;i++) {   /* compute ans2 = pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) */
     667     3037840 :     sp = pp+i*ldp;
     668     3037840 :     sq = qq+i*ldq;
     669     3037840 :     ans2 = (*sp) * (*sq);  /* the first term pp(1,i)*qq(1,i) is being added twice */
     670   173048070 :     for (k=0;k<deg1;k++) ans2 += (*sp++)*(*sq++);  /* add pp(k,i)*qq(k,i) */
     671     3037840 :     ans += ans2*intervalWeights[i];  /* compute ans += ans2*intervalWeights(i) */
     672             :   }
     673      607568 :   PetscFunctionReturn(ans*PETSC_PI/2.0);
     674             : }
     675             : 
     676             : /*
     677             :    FILTLAN function PiecewisePolynomialInChebyshevBasisMultiplyX
     678             : 
     679             :    compute Q(z) = z*P(z), where P(z) and Q(z) are piecewise polynomials expanded
     680             :    in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
     681             : 
     682             :    P(z) and Q(z) are stored as matrices of Chebyshev coefficients pp and qq, respectively
     683             : 
     684             :    For z in the j-th interval, P(z) equals
     685             :        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
     686             :    and Q(z) equals
     687             :        Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
     688             :    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
     689             :        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
     690             :    with T_i(z) the Chebyshev polynomial of the first kind
     691             :        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
     692             : 
     693             :    the returned matrix is qq which represents Q(z) = z*P(z)
     694             : */
     695      202968 : static inline PetscErrorCode FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(PetscReal *pp,PetscInt deg1,PetscInt ldp,PetscReal *intv,PetscInt nintv,PetscReal *qq,PetscInt ldq)
     696             : {
     697      202968 :   PetscInt  i,j;
     698      202968 :   PetscReal c,h,h2,tmp,*sp,*sq,*sp2,*sq2;
     699             : 
     700      202968 :   PetscFunctionBegin;
     701     1217808 :   for (j=0;j<nintv;j++) {    /* consider interval between intv(j) and intv(j+1) */
     702     1014840 :     sp = pp+j*ldp;
     703     1014840 :     sq = qq+j*ldq;
     704     1014840 :     sp2 = sp;
     705     1014840 :     sq2 = sq+1;
     706     1014840 :     c = 0.5*(intv[j] + intv[j+1]);   /* compute c = (intv(j) + intv(j+1))/2 */
     707     1014840 :     h = 0.5*(intv[j+1] - intv[j]);   /* compute h = (intv(j+1) - intv(j))/2  and  h2 = h/2 */
     708     1014840 :     h2 = 0.5*h;
     709     1014840 :     i = deg1;
     710    75341770 :     while (i--) *sq++ = c*(*sp++);    /* compute q(1:deg1,j) = c*p(1:deg1,j) */
     711     1014840 :     *sq++ = 0.0;                      /* set q(deg1+1,j) = 0.0 */
     712     1014840 :     *(sq2++) += h*(*sp2++);           /* compute q(2,j) = q(2,j) + h*p(1,j) */
     713     1014840 :     i = deg1-1;
     714    74326930 :     while (i--) {       /* compute q(3:deg1+1,j) += h2*p(2:deg1,j) and then q(1:deg1-1,j) += h2*p(2:deg1,j) */
     715    73312090 :       tmp = h2*(*sp2++);
     716    73312090 :       *(sq2-2) += tmp;
     717    73312090 :       *(sq2++) += tmp;
     718             :     }
     719             :   }
     720      202968 :   PetscFunctionReturn(PETSC_SUCCESS);
     721             : }
     722             : 
     723             : /* ////////////////////////////////////////////////////////////////////////////
     724             :    //    Conjugate Residual Method in the Polynomial Space
     725             :    //////////////////////////////////////////////////////////////////////////// */
     726             : 
     727             : /*
     728             :     B := alpha*A + beta*B
     729             : 
     730             :     A,B are nxk
     731             : */
     732      807434 : static inline PetscErrorCode Mat_AXPY_BLAS(PetscInt n,PetscInt k,PetscReal alpha,const PetscReal *A,PetscInt lda,PetscReal beta,PetscReal *B,PetscInt ldb)
     733             : {
     734      807434 :   PetscInt       i,j;
     735             : 
     736      807434 :   PetscFunctionBegin;
     737      807434 :   if (beta==(PetscReal)1.0) {
     738   152235100 :     for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] += alpha*A[i+j*lda];
     739      404600 :     PetscCall(PetscLogFlops(2.0*n*k));
     740             :   } else {
     741   152046929 :     for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] = alpha*A[i+j*lda] + beta*B[i+j*ldb];
     742      402834 :     PetscCall(PetscLogFlops(3.0*n*k));
     743             :   }
     744      807434 :   PetscFunctionReturn(PETSC_SUCCESS);
     745             : }
     746             : 
     747             : /*
     748             :    FILTLAN function FilteredConjugateResidualPolynomial
     749             : 
     750             :    ** Conjugate Residual Method in the Polynomial Space
     751             : 
     752             :    this routine employs a conjugate-residual-type algorithm in polynomial space to minimize ||P(z)-Q(z)||_w,
     753             :    where P(z), the base filter, is the input piecewise polynomial, and
     754             :          Q(z) is the output polynomial satisfying Q(0)==1, i.e. the constant term of Q(z) is 1
     755             :    niter is the number of conjugate-residual iterations; therefore, the degree of Q(z) is up to niter+1
     756             :    both P(z) and Q(z) are expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
     757             :    and presented as matrices of Chebyshev coefficients, denoted by pp and qq, respectively
     758             : 
     759             :    input:
     760             :    intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
     761             :    w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
     762             :        the interval weights define the inner product of two continuous functions and then
     763             :        the derived w-norm ||P(z)-Q(z)||_w
     764             :    pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
     765             :    to be specific, for z in [intv(j), intv(j+1)), P(z) equals
     766             :        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
     767             :    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
     768             :        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
     769             :    with T_i(z) the Chebyshev polynomial of the first kind,
     770             :        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
     771             : 
     772             :    output:
     773             :    the return matrix, denoted by qq, represents a polynomial Q(z) with degree up to 1+niter
     774             :    and satisfying Q(0)==1, such that ||P(z))-Q(z)||_w is minimized
     775             :    this polynomial Q(z) is expanded in the `translated' Chebyshev basis for each interval
     776             :    to be precise, considering z in [intv(j), intv(j+1)), Q(z) equals
     777             :        Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(niter+2,j)*S_{niter+1}(z)
     778             : 
     779             :    note:
     780             :    1. since Q(0)==1, P(0)==1 is expected; if P(0)!=1, one can translate P(z)
     781             :       for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
     782             :    2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
     783             :       in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
     784             : */
     785         883 : static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal *cpol,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt m,PetscReal *intervalWeights,PetscInt niter)
     786             : {
     787         883 :   PetscInt       i,j,srpol,scpol,sarpol,sppol,sappol,ld,nintv;
     788         883 :   PetscReal      rho,rho0,rho1,den,bet,alp,alp0,*ppol,*rpol,*appol,*arpol;
     789             : 
     790         883 :   PetscFunctionBegin;
     791         883 :   nintv = m-1;
     792         883 :   ld = niter+2;  /* leading dimension */
     793         883 :   PetscCall(PetscCalloc4(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&appol,ld*nintv,&arpol));
     794         883 :   PetscCall(PetscArrayzero(cpol,ld*nintv));
     795             :   /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
     796        5298 :   sppol = 2;
     797        5298 :   srpol = 2;
     798        5298 :   scpol = 2;
     799        5298 :   for (j=0;j<nintv;j++) {
     800        4415 :     ppol[j*ld] = 1.0;
     801        4415 :     rpol[j*ld] = 1.0;
     802        4415 :     cpol[j*ld] = 1.0;
     803             :   }
     804             :   /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
     805             :      rpol is the r-polynomial (corresponding to the residual vector r in CG)
     806             :      cpol is the "corrected" residual polynomial (result of this function) */
     807         883 :   sappol = 3;
     808         883 :   sarpol = 3;
     809         883 :   PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
     810       16777 :   for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
     811         883 :   rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
     812      117583 :   for (i=0;i<niter;i++) {
     813      116700 :     den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
     814      116700 :     alp0 = rho/den;
     815      116700 :     rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
     816      116700 :     alp  = (rho-rho1)/den;
     817      116700 :     srpol++;
     818      116700 :     scpol++;
     819      116700 :     PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
     820      116700 :     PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
     821      116700 :     if (i+1 == niter) break;
     822      115817 :     sarpol++;
     823      115817 :     PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
     824      115817 :     rho0 = rho;
     825      115817 :     rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
     826      115817 :     bet  = rho / rho0;
     827      115817 :     sppol++;
     828      115817 :     sappol++;
     829      115817 :     PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
     830      115817 :     PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
     831             :   }
     832         883 :   PetscCall(PetscFree4(ppol,rpol,appol,arpol));
     833         883 :   PetscFunctionReturn(PETSC_SUCCESS);
     834             : }
     835             : 
     836             : /*
     837             :    FILTLAN function FilteredConjugateResidualMatrixPolynomialVectorProduct
     838             : 
     839             :    this routine employs a conjugate-residual-type algorithm in polynomial space to compute
     840             :    x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized, where
     841             :    P(z) is a given piecewise polynomial, called the base filter,
     842             :    s(z) is a polynomial of degree up to niter, the number of conjugate-residual iterations,
     843             :    and b and x0 are given vectors
     844             : 
     845             :    note that P(z) is expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
     846             :    and presented as a matrix of Chebyshev coefficients pp
     847             : 
     848             :    input:
     849             :    A is a sparse matrix
     850             :    x0, b are vectors
     851             :    niter is the number of conjugate-residual iterations
     852             :    intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
     853             :    w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
     854             :        the interval weights define the inner product of two continuous functions and then
     855             :        the derived w-norm ||P(z)-Q(z)||_w
     856             :    pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
     857             :    to be specific, for z in [intv(j), intv(j+1)), P(z) equals
     858             :        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
     859             :    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
     860             :        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
     861             :    with T_i(z) the Chebyshev polynomial of the first kind,
     862             :        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
     863             :    tol is the tolerance; if the residual polynomial in z-norm is dropped by a factor lower
     864             :        than tol, then stop the conjugate-residual iteration
     865             : 
     866             :    output:
     867             :    the return vector is x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized,
     868             :    subject to that s(z) is a polynomial of degree up to niter, where P(z) is the base filter
     869             :    in short, z*s(z) approximates 1-P(z)
     870             : 
     871             :    note:
     872             :    1. since z*s(z) approximates 1-P(z), P(0)==1 is expected; if P(0)!=1, one can translate P(z)
     873             :       for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
     874             :    2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
     875             :       in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
     876             :    3. a common application is to compute R(A)*b, where R(z) approximates 1-P(z)
     877             :       in this case, one can set x0 = 0 and then the return vector is x = s(A)*b, where
     878             :       z*s(z) approximates 1-P(z); therefore, A*x is the wanted R(A)*b
     879             : */
     880         642 : static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(Mat A,Vec b,Vec x,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work)
     881             : {
     882         642 :   PetscInt       i,j,srpol,scpol,sarpol,sppol,sappol,ld;
     883         642 :   PetscReal      rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
     884         642 :   Vec            r=work[0],p=work[1],ap=work[2],w=work[3];
     885         642 :   PetscScalar    alpha;
     886             : 
     887         642 :   PetscFunctionBegin;
     888         642 :   ld = niter+3;  /* leading dimension */
     889         642 :   PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol));
     890             :   /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
     891        3852 :   sppol = 2;
     892        3852 :   srpol = 2;
     893        3852 :   scpol = 2;
     894        3852 :   for (j=0;j<nintv;j++) {
     895        3210 :     ppol[j*ld] = 1.0;
     896        3210 :     rpol[j*ld] = 1.0;
     897        3210 :     cpol[j*ld] = 1.0;
     898             :   }
     899             :   /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
     900             :      rpol is the r-polynomial (corresponding to the residual vector r in CG)
     901             :      cpol is the "corrected" residual polynomial */
     902         642 :   sappol = 3;
     903         642 :   sarpol = 3;
     904         642 :   PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
     905       12198 :   for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
     906         642 :   rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
     907         642 :   rho = rho00;
     908             : 
     909             :   /* corrected CR in vector space */
     910             :   /* we assume x0 is always 0 */
     911         642 :   PetscCall(VecSet(x,0.0));
     912         642 :   PetscCall(VecCopy(b,r));     /* initial residual r = b-A*x0 */
     913         642 :   PetscCall(VecCopy(r,p));     /* p = r */
     914         642 :   PetscCall(MatMult(A,p,ap));  /* ap = A*p */
     915             : 
     916       81042 :   for (i=0;i<niter;i++) {
     917             :     /* iteration in the polynomial space */
     918       80400 :     den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
     919       80400 :     alp0 = rho/den;
     920       80400 :     rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
     921       80400 :     alp  = (rho-rho1)/den;
     922       80400 :     srpol++;
     923       80400 :     scpol++;
     924       80400 :     PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
     925       80400 :     PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
     926       80400 :     sarpol++;
     927       80400 :     PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
     928       80400 :     rho0 = rho;
     929       80400 :     rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
     930             : 
     931             :     /* update x in the vector space */
     932       80400 :     alpha = alp;
     933       80400 :     PetscCall(VecAXPY(x,alpha,p));   /* x += alp*p */
     934       80400 :     if (rho < tol*rho00) break;
     935             : 
     936             :     /* finish the iteration in the polynomial space */
     937       80400 :     bet = rho / rho0;
     938       80400 :     sppol++;
     939       80400 :     sappol++;
     940       80400 :     PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
     941       80400 :     PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
     942             : 
     943             :     /* finish the iteration in the vector space */
     944       80400 :     alpha = -alp0;
     945       80400 :     PetscCall(VecAXPY(r,alpha,ap));    /* r -= alp0*ap */
     946       80400 :     alpha = bet;
     947       80400 :     PetscCall(VecAYPX(p,alpha,r));     /* p = r + bet*p */
     948       80400 :     PetscCall(MatMult(A,r,w));         /* ap = A*r + bet*ap */
     949       80400 :     PetscCall(VecAYPX(ap,alpha,w));
     950             :   }
     951         642 :   PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol));
     952         642 :   PetscFunctionReturn(PETSC_SUCCESS);
     953             : }
     954             : 
     955             : /*
     956             :    Gateway to FILTLAN for evaluating y=p(A)*x
     957             : */
     958         642 : static PetscErrorCode MatMult_FILTLAN(Mat A,Vec x,Vec y)
     959             : {
     960         642 :   ST             st;
     961         642 :   ST_FILTER      *ctx;
     962         642 :   PetscInt       npoints;
     963             : 
     964         642 :   PetscFunctionBegin;
     965         642 :   PetscCall(MatShellGetContext(A,&st));
     966         642 :   ctx = (ST_FILTER*)st->data;
     967         642 :   npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
     968         642 :   PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(ctx->T,x,y,ctx->baseFilter,2*ctx->baseDegree+2,ctx->intervals,npoints-1,ctx->opts->intervalWeights,ctx->polyDegree,st->work));
     969         642 :   PetscCall(VecCopy(y,st->work[0]));
     970         642 :   PetscCall(MatMult(ctx->T,st->work[0],y));
     971         642 :   PetscFunctionReturn(PETSC_SUCCESS);
     972             : }
     973             : 
     974             : /* Block version of FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct */
     975          26 : static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(Mat A,Mat B,Mat C,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work,Mat R,Mat P,Mat AP,Mat W)
     976             : {
     977          26 :   PetscInt       i,j,srpol,scpol,sarpol,sppol,sappol,ld;
     978          26 :   PetscReal      rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
     979          26 :   PetscScalar    alpha;
     980             : 
     981          26 :   PetscFunctionBegin;
     982          26 :   ld = niter+3;  /* leading dimension */
     983          26 :   PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol));
     984             :   /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
     985         156 :   sppol = 2;
     986         156 :   srpol = 2;
     987         156 :   scpol = 2;
     988         156 :   for (j=0;j<nintv;j++) {
     989         130 :     ppol[j*ld] = 1.0;
     990         130 :     rpol[j*ld] = 1.0;
     991         130 :     cpol[j*ld] = 1.0;
     992             :   }
     993             :   /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
     994             :      rpol is the r-polynomial (corresponding to the residual vector r in CG)
     995             :      cpol is the "corrected" residual polynomial */
     996          26 :   sappol = 3;
     997          26 :   sarpol = 3;
     998          26 :   PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
     999         494 :   for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
    1000          26 :   rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
    1001          26 :   rho = rho00;
    1002             : 
    1003             :   /* corrected CR in vector space */
    1004             :   /* we assume x0 is always 0 */
    1005          26 :   PetscCall(MatZeroEntries(C));
    1006          26 :   PetscCall(MatCopy(B,R,SAME_NONZERO_PATTERN));     /* initial residual r = b-A*x0 */
    1007          26 :   PetscCall(MatCopy(R,P,SAME_NONZERO_PATTERN));     /* p = r */
    1008          26 :   PetscCall(MatMatMult(A,P,MAT_REUSE_MATRIX,PETSC_DEFAULT,&AP));  /* ap = A*p */
    1009             : 
    1010        5226 :   for (i=0;i<niter;i++) {
    1011             :     /* iteration in the polynomial space */
    1012        5200 :     den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
    1013        5200 :     alp0 = rho/den;
    1014        5200 :     rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
    1015        5200 :     alp  = (rho-rho1)/den;
    1016        5200 :     srpol++;
    1017        5200 :     scpol++;
    1018        5200 :     PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
    1019        5200 :     PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
    1020        5200 :     sarpol++;
    1021        5200 :     PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
    1022        5200 :     rho0 = rho;
    1023        5200 :     rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
    1024             : 
    1025             :     /* update x in the vector space */
    1026        5200 :     alpha = alp;
    1027        5200 :     PetscCall(MatAXPY(C,alpha,P,SAME_NONZERO_PATTERN));   /* x += alp*p */
    1028        5200 :     if (rho < tol*rho00) break;
    1029             : 
    1030             :     /* finish the iteration in the polynomial space */
    1031        5200 :     bet = rho / rho0;
    1032        5200 :     sppol++;
    1033        5200 :     sappol++;
    1034        5200 :     PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
    1035        5200 :     PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
    1036             : 
    1037             :     /* finish the iteration in the vector space */
    1038        5200 :     alpha = -alp0;
    1039        5200 :     PetscCall(MatAXPY(R,alpha,AP,SAME_NONZERO_PATTERN));    /* r -= alp0*ap */
    1040        5200 :     alpha = bet;
    1041        5200 :     PetscCall(MatAYPX(P,alpha,R,SAME_NONZERO_PATTERN));     /* p = r + bet*p */
    1042        5200 :     PetscCall(MatMatMult(A,R,MAT_REUSE_MATRIX,PETSC_DEFAULT,&W));         /* ap = A*r + bet*ap */
    1043        5200 :     PetscCall(MatAYPX(AP,alpha,W,SAME_NONZERO_PATTERN));
    1044             :   }
    1045          26 :   PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol));
    1046          26 :   PetscFunctionReturn(PETSC_SUCCESS);
    1047             : }
    1048             : 
    1049             : /*
    1050             :    Gateway to FILTLAN for evaluating C=p(A)*B
    1051             : */
    1052          26 : static PetscErrorCode MatMatMult_FILTLAN(Mat A,Mat B,Mat C,void *pctx)
    1053             : {
    1054          26 :   ST             st;
    1055          26 :   ST_FILTER      *ctx;
    1056          26 :   PetscInt       i,m1,m2,npoints;
    1057             : 
    1058          26 :   PetscFunctionBegin;
    1059          26 :   PetscCall(MatShellGetContext(A,&st));
    1060          26 :   ctx = (ST_FILTER*)st->data;
    1061          26 :   npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
    1062          26 :   if (ctx->nW) {  /* check if work matrices must be resized */
    1063          25 :     PetscCall(MatGetSize(B,NULL,&m1));
    1064          25 :     PetscCall(MatGetSize(ctx->W[0],NULL,&m2));
    1065          25 :     if (m1!=m2) {
    1066           3 :       PetscCall(MatDestroyMatrices(ctx->nW,&ctx->W));
    1067           3 :       ctx->nW = 0;
    1068             :     }
    1069             :   }
    1070          26 :   if (!ctx->nW) {  /* allocate work matrices */
    1071           4 :     ctx->nW = 4;
    1072           4 :     PetscCall(PetscMalloc1(ctx->nW,&ctx->W));
    1073          20 :     for (i=0;i<ctx->nW;i++) PetscCall(MatDuplicate(B,MAT_DO_NOT_COPY_VALUES,&ctx->W[i]));
    1074             :   }
    1075          26 :   PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(ctx->T,B,ctx->W[0],ctx->baseFilter,2*ctx->baseDegree+2,ctx->intervals,npoints-1,ctx->opts->intervalWeights,ctx->polyDegree,st->work,C,ctx->W[1],ctx->W[2],ctx->W[3]));
    1076          26 :   PetscCall(MatMatMult(ctx->T,ctx->W[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&C));
    1077          26 :   PetscFunctionReturn(PETSC_SUCCESS);
    1078             : }
    1079             : 
    1080             : /*
    1081             :    FILTLAN function PolynomialFilterInterface::setFilter
    1082             : 
    1083             :    Creates the shifted (and scaled) matrix and the base filter P(z).
    1084             :    M is a shell matrix whose MatMult() applies the filter.
    1085             : */
    1086           8 : PetscErrorCode STFilter_FILTLAN_setFilter(ST st,Mat *G)
    1087             : {
    1088           8 :   ST_FILTER      *ctx = (ST_FILTER*)st->data;
    1089           8 :   PetscInt       i,npoints,n,m,N,M;
    1090           8 :   PetscReal      frame2[4];
    1091           8 :   PetscScalar    alpha;
    1092           8 :   const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 };
    1093             : 
    1094           8 :   PetscFunctionBegin;
    1095           8 :   if (ctx->frame[0] == ctx->frame[1]) {  /* low pass filter, convert it to high pass filter */
    1096             :     /* T = frame[3]*eye(n) - A */
    1097           0 :     PetscCall(MatDestroy(&ctx->T));
    1098           0 :     PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T));
    1099           0 :     PetscCall(MatScale(ctx->T,-1.0));
    1100           0 :     alpha = ctx->frame[3];
    1101           0 :     PetscCall(MatShift(ctx->T,alpha));
    1102           0 :     for (i=0;i<4;i++) frame2[i] = ctx->frame[3] - ctx->frame[3-i];
    1103             :   } else {  /* it can be a mid-pass filter or a high-pass filter */
    1104           8 :     if (ctx->frame[0] == 0.0) {
    1105           0 :       PetscCall(PetscObjectReference((PetscObject)st->A[0]));
    1106           0 :       PetscCall(MatDestroy(&ctx->T));
    1107           0 :       ctx->T = st->A[0];
    1108           0 :       for (i=0;i<4;i++) frame2[i] = ctx->frame[i];
    1109             :     } else {
    1110             :       /* T = A - frame[0]*eye(n) */
    1111           8 :       PetscCall(MatDestroy(&ctx->T));
    1112           8 :       PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T));
    1113           8 :       alpha = -ctx->frame[0];
    1114           8 :       PetscCall(MatShift(ctx->T,alpha));
    1115          40 :       for (i=0;i<4;i++) frame2[i] = ctx->frame[i] - ctx->frame[0];
    1116             :     }
    1117             :   }
    1118             : 
    1119             :   /* no need to recompute filter if the parameters did not change */
    1120           8 :   if (st->state==ST_STATE_INITIAL || ctx->filtch) {
    1121           7 :     PetscCall(FILTLAN_GetIntervals(ctx->intervals,frame2,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo));
    1122             :     /* translate the intervals back */
    1123           7 :     if (ctx->frame[0] == ctx->frame[1]) {  /* low pass filter, convert it to high pass filter */
    1124           0 :       for (i=0;i<4;i++) ctx->intervals2[i] = ctx->frame[3] - ctx->intervals[3-i];
    1125             :     } else {  /* it can be a mid-pass filter or a high-pass filter */
    1126           7 :       if (ctx->frame[0] == 0.0) {
    1127           0 :         for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i];
    1128             :       } else {
    1129          49 :         for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i] + ctx->frame[0];
    1130             :       }
    1131             :     }
    1132             : 
    1133           7 :     npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
    1134           7 :     PetscCall(PetscFree(ctx->baseFilter));
    1135           7 :     PetscCall(PetscMalloc1((2*ctx->baseDegree+2)*(npoints-1),&ctx->baseFilter));
    1136           7 :     PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(ctx->baseFilter,ctx->intervals,npoints,HighLowFlags,ctx->baseDegree));
    1137           7 :     PetscCall(PetscInfo(st,"Computed value of yLimit = %g\n",(double)ctx->filterInfo->yLimit));
    1138             :   }
    1139           8 :   ctx->filtch = PETSC_FALSE;
    1140             : 
    1141             :   /* create shell matrix*/
    1142           8 :   if (!*G) {
    1143           7 :     PetscCall(MatGetSize(ctx->T,&N,&M));
    1144           7 :     PetscCall(MatGetLocalSize(ctx->T,&n,&m));
    1145           7 :     PetscCall(MatCreateShell(PetscObjectComm((PetscObject)st),n,m,N,M,st,G));
    1146           7 :     PetscCall(MatShellSetOperation(*G,MATOP_MULT,(void(*)(void))MatMult_FILTLAN));
    1147           7 :     PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSE,MATDENSE));
    1148           7 :     PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSECUDA,MATDENSECUDA));
    1149           7 :     PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSEHIP,MATDENSEHIP));
    1150             :   }
    1151           8 :   PetscFunctionReturn(PETSC_SUCCESS);
    1152             : }

Generated by: LCOV version 1.14