Actual source code: filtlan.c

slepc-3.21.1 2024-04-26
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    This file is an adaptation of several subroutines from FILTLAN, the
 12:    Filtered Lanczos Package, authored by Haw-ren Fang and Yousef Saad.

 14:    More information at:
 15:    https://www-users.cs.umn.edu/~saad/software/filtlan

 17:    References:

 19:        [1] H. Fang and Y. Saad, "A filtered Lanczos procedure for extreme and interior
 20:            eigenvalue problems", SIAM J. Sci. Comput. 34(4):A2220-A2246, 2012.
 21: */

 23: #include <slepc/private/stimpl.h>
 24: #include "filter.h"

 26: static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal*,PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal*,PetscInt);
 27: static PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal);
 28: static PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt,PetscReal,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*);

 30: /* ////////////////////////////////////////////////////////////////////////////
 31:    //    Newton - Hermite Polynomial Interpolation
 32:    //////////////////////////////////////////////////////////////////////////// */

 34: /*
 35:    FILTLAN function NewtonPolynomial

 37:    build P(z) by Newton's divided differences in the form
 38:        P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1)),
 39:    such that P(x(i)) = y(i) for i=1,...,n, where
 40:        x,y are input vectors of length n, and a is the output vector of length n
 41:    if x(i)==x(j) for some i!=j, then it is assumed that the derivative of P(z) is to be zero at x(i),
 42:        and the Hermite polynomial interpolation is applied
 43:    in general, if there are k x(i)'s with the same value x0, then
 44:        the j-th order derivative of P(z) is zero at z=x0 for j=1,...,k-1
 45: */
 46: static inline PetscErrorCode FILTLAN_NewtonPolynomial(PetscInt n,PetscReal *x,PetscReal *y,PetscReal *sa,PetscReal *sf)
 47: {
 48:   PetscReal      d,*sx=x,*sy=y;
 49:   PetscInt       j,k;

 51:   PetscFunctionBegin;
 52:   PetscCall(PetscArraycpy(sf,sy,n));

 54:   /* apply Newton's finite difference method */
 55:   sa[0] = sf[0];
 56:   for (j=1;j<n;j++) {
 57:     for (k=n-1;k>=j;k--) {
 58:       d = sx[k]-sx[k-j];
 59:       if (PetscUnlikely(d == 0.0)) sf[k] = 0.0;  /* assume that the derivative is 0.0 and apply the Hermite interpolation */
 60:       else sf[k] = (sf[k]-sf[k-1]) / d;
 61:     }
 62:     sa[j] = sf[j];
 63:   }
 64:   PetscFunctionReturn(PETSC_SUCCESS);
 65: }

 67: /*
 68:    FILTLAN function HermiteBaseFilterInChebyshevBasis

 70:    compute a base filter P(z) which is a continuous, piecewise polynomial P(z) expanded
 71:    in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials in each interval

 73:    The base filter P(z) equals P_j(z) for z in the j-th interval [intv(j), intv(j+1)), where
 74:    P_j(z) a Hermite interpolating polynomial

 76:    input:
 77:    intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
 78:    HiLowFlags determines the shape of the base filter P(z)
 79:    Consider the j-th interval [intv(j), intv(j+1)]
 80:    HighLowFlag[j-1]==1,  P(z)==1 for z in [intv(j), intv(j+1)]
 81:                    ==0,  P(z)==0 for z in [intv(j), intv(j+1)]
 82:                    ==-1, [intv(j), intv(j+1)] is a transition interval;
 83:                          P(intv(j)) and P(intv(j+1)) are defined such that P(z) is continuous
 84:    baseDeg is the degree of smoothness of the Hermite (piecewise polynomial) interpolation
 85:    to be precise, the i-th derivative of P(z) is zero, i.e. d^{i}P(z)/dz^i==0, at all interval
 86:    end points z=intv(j) for i=1,...,baseDeg

 88:    output:
 89:    P(z) expanded in a basis of `translated' (scale-and-shift) Chebyshev polynomials
 90:    to be precise, for z in the j-th interval [intv(j),intv(j+1)), P(z) equals
 91:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
 92:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
 93:        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
 94:    with T_i(z) the Chebyshev polynomial of the first kind,
 95:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
 96:    the return matrix is the matrix of Chebyshev coefficients pp just described

 98:    note that the degree of P(z) in each interval is (at most) 2*baseDeg+1, with 2*baseDeg+2 coefficients
 99:    let n be the length of intv; then there are n-1 intervals
100:    therefore the return matrix pp is of size (2*baseDeg+2)-by-(n-1)
101: */
102: static PetscErrorCode FILTLAN_HermiteBaseFilterInChebyshevBasis(PetscReal *baseFilter,PetscReal *intv,PetscInt npoints,const PetscInt *HighLowFlags,PetscInt baseDeg)
103: {
104:   PetscInt       m,ii,jj;
105:   PetscReal      flag,flag0,flag2,aa,bb,*px,*py,*sx,*sy,*pp,*qq,*sq,*sbf,*work,*currentPoint = intv;
106:   const PetscInt *hilo = HighLowFlags;

108:   PetscFunctionBegin;
109:   m = 2*baseDeg+2;
110:   jj = npoints-1;  /* jj is initialized as the number of intervals */
111:   PetscCall(PetscMalloc5(m,&px,m,&py,m,&pp,m,&qq,m,&work));
112:   sbf = baseFilter;

114:   while (jj--) {  /* the main loop to compute the Chebyshev coefficients */

116:     flag = (PetscReal)(*hilo++);  /* get the flag of the current interval */
117:     if (flag == -1.0) {  /* flag == -1 means that the current interval is a transition polynomial */

119:       flag2 = (PetscReal)(*hilo);  /* get flag2, the flag of the next interval */
120:       flag0 = 1.0-flag2;       /* the flag of the previous interval is 1-flag2 */

122:       /* two pointers for traversing x[] and y[] */
123:       sx = px;
124:       sy = py;

126:       /* find the current interval [aa,bb] */
127:       aa = *currentPoint++;
128:       bb = *currentPoint;

130:       /* now left-hand side */
131:       ii = baseDeg+1;
132:       while (ii--) {
133:         *sy++ = flag0;
134:         *sx++ = aa;
135:       }

137:       /* now right-hand side */
138:       ii = baseDeg+1;
139:       while (ii--) {
140:         *sy++ = flag2;
141:         *sx++ = bb;
142:       }

144:       /* build a Newton polynomial (indeed, the generalized Hermite interpolating polynomial) with x[] and y[] */
145:       PetscCall(FILTLAN_NewtonPolynomial(m,px,py,pp,work));

147:       /* pp contains coefficients of the Newton polynomial P(z) in the current interval [aa,bb], where
148:          P(z) = pp(1) + pp(2)*(z-px(1)) + pp(3)*(z-px(1))*(z-px(2)) + ... + pp(n)*(z-px(1))*...*(z-px(n-1)) */

150:       /* translate the Newton coefficients to the Chebyshev coefficients */
151:       PetscCall(FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(m,aa,bb,pp,px,qq,work));
152:       /* qq contains coefficients of the polynomial in [aa,bb] in the `translated' Chebyshev basis */

154:       /* copy the Chebyshev coefficients to baseFilter
155:          OCTAVE/MATLAB: B(:,j) = qq, where j = (npoints-1)-jj and B is the return matrix */
156:       sq = qq;
157:       ii = 2*baseDeg+2;
158:       while (ii--) *sbf++ = *sq++;

160:     } else {

162:       /* a constant polynomial P(z)=flag, where either flag==0 or flag==1
163:        OCTAVE/MATLAB: B(1,j) = flag, where j = (npoints-1)-jj and B is the return matrix */
164:       *sbf++ = flag;

166:       /* the other coefficients are all zero, since P(z) is a constant
167:        OCTAVE/MATLAB: B(1,j) = 0, where j = (npoints-1)-jj and B is the return matrix */
168:       ii = 2*baseDeg+1;
169:       while (ii--) *sbf++ = 0.0;

171:       /* for the next point */
172:       currentPoint++;
173:     }
174:   }
175:   PetscCall(PetscFree5(px,py,pp,qq,work));
176:   PetscFunctionReturn(PETSC_SUCCESS);
177: }

179: /* ////////////////////////////////////////////////////////////////////////////
180:    //    Base Filter
181:    //////////////////////////////////////////////////////////////////////////// */

183: /*
184:    FILTLAN function GetIntervals

186:    this routine determines the intervals (including the transition one(s)) by an iterative process

188:    frame is a vector consisting of 4 ordered elements:
189:        [frame(1),frame(4)] is the interval which (tightly) contains all eigenvalues, and
190:        [frame(2),frame(3)] is the interval in which the eigenvalues are sought
191:    baseDeg is the left-and-right degree of the base filter for each interval
192:    polyDeg is the (maximum possible) degree of s(z), with z*s(z) the polynomial filter
193:    intv is the output; the j-th interval is [intv(j),intv(j+1))
194:    opts is a collection of interval options

196:    the base filter P(z) is a piecewise polynomial from Hermite interpolation with degree baseDeg
197:    at each end point of intervals

199:    the polynomial filter Q(z) is in the form z*s(z), i.e. Q(0)==0, such that ||(1-P(z))-z*s(z)||_w is
200:    minimized with s(z) a polynomial of degree up to polyDeg

202:    the resulting polynomial filter Q(z) satisfies Q(x)>=Q(y) for x in [frame[1],frame[2]], and
203:    y in [frame[0],frame[3]] but not in [frame[1],frame[2]]

205:    the routine fills a PolynomialFilterInfo struct which gives some information of the polynomial filter
206: */
207: static PetscErrorCode FILTLAN_GetIntervals(PetscReal *intervals,PetscReal *frame,PetscInt polyDeg,PetscInt baseDeg,FILTLAN_IOP opts,FILTLAN_PFI filterInfo)
208: {
209:   PetscReal       intv[6],x,y,z1,z2,c,c1,c2,fc,fc2,halfPlateau,leftDelta,rightDelta,gridSize;
210:   PetscReal       yLimit,ySummit,yLeftLimit,yRightLimit,bottom,qIndex,*baseFilter,*polyFilter;
211:   PetscReal       yLimitGap=0.0,yLeftSummit=0.0,yLeftBottom=0.0,yRightSummit=0.0,yRightBottom=0.0;
212:   PetscInt        i,ii,npoints,numIter,numLeftSteps=1,numRightSteps=1,numMoreLooked=0;
213:   PetscBool       leftBoundaryMet=PETSC_FALSE,rightBoundaryMet=PETSC_FALSE,stepLeft,stepRight;
214:   const PetscReal a=frame[0],a1=frame[1],b1=frame[2],b=frame[3];
215:   const PetscInt  HighLowFlags[5] = { 1, -1, 0, -1, 1 };  /* if filterType is 1, only first 3 elements will be used */
216:   const PetscInt  numLookMore = 2*(PetscInt)(0.5+(PetscLogReal(2.0)/PetscLogReal(opts->shiftStepExpanRate)));

218:   PetscFunctionBegin;
219:   PetscCheck(a<=a1 && a1<=b1 && b1<=b,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Values in the frame vector should be non-decreasing");
220:   PetscCheck(a1!=b1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The range of wanted eigenvalues cannot be of size zero");
221:   filterInfo->filterType = 2;      /* mid-pass filter, for interior eigenvalues */
222:   if (b == b1) {
223:     PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"A polynomial filter should not cover all eigenvalues");
224:     filterInfo->filterType = 1;    /* high-pass filter, for largest eigenvalues */
225:   } else PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"filterType==3 for smallest eigenvalues should be pre-converted to filterType==1 for largest eigenvalues");

227:   /* the following recipe follows Yousef Saad (2005, 2006) with a few minor adaptations / enhancements */
228:   halfPlateau = 0.5*(b1-a1)*opts->initialPlateau;    /* half length of the "plateau" of the (dual) base filter */
229:   leftDelta = (b1-a1)*opts->initialShiftStep;        /* initial left shift */
230:   rightDelta = leftDelta;                            /* initial right shift */
231:   opts->numGridPoints = PetscMax(opts->numGridPoints,(PetscInt)(2.0*(b-a)/halfPlateau));
232:   gridSize = (b-a) / (PetscReal)opts->numGridPoints;

234:   for (i=0;i<6;i++) intv[i] = 0.0;
235:   if (filterInfo->filterType == 2) {  /* for interior eigenvalues */
236:     npoints = 6;
237:     intv[0] = a;
238:     intv[5] = b;
239:     /* intv[1], intv[2], intv[3], and intv[4] to be determined */
240:   } else { /* filterType == 1 (or 3 with conversion), for extreme eigenvalues */
241:     npoints = 4;
242:     intv[0] = a;
243:     intv[3] = b;
244:     /* intv[1], and intv[2] to be determined */
245:   }
246:   z1 = a1 - leftDelta;
247:   z2 = b1 + rightDelta;
248:   filterInfo->filterOK = 0;  /* not yet found any OK filter */

250:   /* allocate matrices */
251:   PetscCall(PetscMalloc2((polyDeg+2)*(npoints-1),&polyFilter,(2*baseDeg+2)*(npoints-1),&baseFilter));

253:   /* initialize the intervals, mainly for the case opts->maxOuterIter == 0 */
254:   intervals[0] = intv[0];
255:   intervals[3] = intv[3];
256:   intervals[5] = intv[5];
257:   intervals[1] = z1;
258:   if (filterInfo->filterType == 2) {  /* a mid-pass filter for interior eigenvalues */
259:     intervals[4] = z2;
260:     c = (a1+b1) / 2.0;
261:     intervals[2] = c - halfPlateau;
262:     intervals[3] = c + halfPlateau;
263:   } else {  /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
264:     intervals[2] = z1 + (b1-z1)*opts->transIntervalRatio;
265:   }

267:   /* the main loop */
268:   for (numIter=1;numIter<=opts->maxOuterIter;numIter++) {
269:     if (z1 <= a) {  /* outer loop updates z1 and z2 */
270:       z1 = a;
271:       leftBoundaryMet = PETSC_TRUE;
272:     }
273:     if (filterInfo->filterType == 2) {  /* a <= z1 < (a1) */
274:       if (z2 >= b) {  /* a mid-pass filter for interior eigenvalues */
275:         z2 = b;
276:         rightBoundaryMet = PETSC_TRUE;
277:       }
278:       /* a <= z1 < c-h < c+h < z2 <= b, where h is halfPlateau */
279:       /* [z1, c-h] and [c+h, z2] are transition interval */
280:       intv[1] = z1;
281:       intv[4] = z2;
282:       c1 = z1 + halfPlateau;
283:       intv[2] = z1;           /* i.e. c1 - halfPlateau */
284:       intv[3] = c1 + halfPlateau;
285:       PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
286:       PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
287:       /* fc1 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); */
288:       c2 = z2 - halfPlateau;
289:       intv[2] = c2 - halfPlateau;
290:       intv[3] = z2;           /* i.e. c2 + halfPlateau */
291:       PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
292:       PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
293:       fc2 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
294:       yLimitGap = PETSC_MAX_REAL;
295:       ii = opts->maxInnerIter;
296:       while (ii-- && !(yLimitGap <= opts->yLimitTol)) {
297:         /* recursive bisection to get c such that p(a1) are p(b1) approximately the same */
298:         c = (c1+c2) / 2.0;
299:         intv[2] = c - halfPlateau;
300:         intv[3] = c + halfPlateau;
301:         PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
302:         PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
303:         fc = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
304:         if (fc*fc2 < 0.0) {
305:           c1 = c;
306:           /* fc1 = fc; */
307:         } else {
308:           c2 = c;
309:           fc2 = fc;
310:         }
311:         yLimitGap = PetscAbsReal(fc);
312:       }
313:     } else {  /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
314:       intv[1] = z1;
315:       intv[2] = z1 + (b1-z1)*opts->transIntervalRatio;
316:       PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,4,HighLowFlags,baseDeg));
317:       PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,4,opts->intervalWeights,polyDeg));
318:     }
319:     /* polyFilter contains the coefficients of the polynomial filter which approximates phi(x)
320:        expanded in the `translated' Chebyshev basis */
321:     /* psi(x) = 1.0 - phi(x) is the dual base filter approximated by a polynomial in the form x*p(x) */
322:     yLeftLimit  = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
323:     yRightLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1);
324:     yLimit  = (yLeftLimit < yRightLimit) ? yLeftLimit : yRightLimit;
325:     ySummit = (yLeftLimit > yRightLimit) ? yLeftLimit : yRightLimit;
326:     x = a1;
327:     while ((x+=gridSize) < b1) {
328:       y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
329:       if (y < yLimit)  yLimit  = y;
330:       if (y > ySummit) ySummit = y;
331:     }
332:     /* now yLimit is the minimum of x*p(x) for x in [a1, b1] */
333:     stepLeft  = PETSC_FALSE;
334:     stepRight = PETSC_FALSE;
335:     if ((yLimit < yLeftLimit && yLimit < yRightLimit) || yLimit < opts->yBottomLine) {
336:       /* very bad, step to see what will happen */
337:       stepLeft = PETSC_TRUE;
338:       if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
339:     } else if (filterInfo->filterType == 2) {
340:       if (yLeftLimit < yRightLimit) {
341:         if (yRightLimit-yLeftLimit > opts->yLimitTol) stepLeft = PETSC_TRUE;
342:       } else if (yLeftLimit-yRightLimit > opts->yLimitTol) stepRight = PETSC_TRUE;
343:     }
344:     if (!stepLeft) {
345:       yLeftBottom = yLeftLimit;
346:       x = a1;
347:       while ((x-=gridSize) >= a) {
348:         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
349:         if (y < yLeftBottom) yLeftBottom = y;
350:         else if (y > yLeftBottom) break;
351:       }
352:       yLeftSummit = yLeftBottom;
353:       while ((x-=gridSize) >= a) {
354:         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
355:         if (y > yLeftSummit) {
356:           yLeftSummit = y;
357:           if (yLeftSummit > yLimit*opts->yRippleLimit) {
358:             stepLeft = PETSC_TRUE;
359:             break;
360:           }
361:         }
362:         if (y < yLeftBottom) yLeftBottom = y;
363:       }
364:     }
365:     if (filterInfo->filterType == 2 && !stepRight) {
366:       yRightBottom = yRightLimit;
367:       x = b1;
368:       while ((x+=gridSize) <= b) {
369:         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
370:         if (y < yRightBottom) yRightBottom = y;
371:         else if (y > yRightBottom) break;
372:       }
373:       yRightSummit = yRightBottom;
374:       while ((x+=gridSize) <= b) {
375:         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
376:         if (y > yRightSummit) {
377:           yRightSummit = y;
378:           if (yRightSummit > yLimit*opts->yRippleLimit) {
379:             stepRight = PETSC_TRUE;
380:             break;
381:           }
382:         }
383:         if (y < yRightBottom) yRightBottom = y;
384:       }
385:     }
386:     if (!stepLeft && !stepRight) {
387:       if (filterInfo->filterType == 2) bottom = PetscMin(yLeftBottom,yRightBottom);
388:       else bottom = yLeftBottom;
389:       qIndex = 1.0 - (yLimit-bottom) / (ySummit-bottom);
390:       if (filterInfo->filterOK == 0 || filterInfo->filterQualityIndex < qIndex) {
391:         /* found the first OK filter or a better filter */
392:         for (i=0;i<6;i++) intervals[i] = intv[i];
393:         filterInfo->filterOK           = 1;
394:         filterInfo->filterQualityIndex = qIndex;
395:         filterInfo->numIter            = numIter;
396:         filterInfo->yLimit             = yLimit;
397:         filterInfo->ySummit            = ySummit;
398:         filterInfo->numLeftSteps       = numLeftSteps;
399:         filterInfo->yLeftSummit        = yLeftSummit;
400:         filterInfo->yLeftBottom        = yLeftBottom;
401:         if (filterInfo->filterType == 2) {
402:           filterInfo->yLimitGap        = yLimitGap;
403:           filterInfo->numRightSteps    = numRightSteps;
404:           filterInfo->yRightSummit     = yRightSummit;
405:           filterInfo->yRightBottom     = yRightBottom;
406:         }
407:         numMoreLooked = 0;
408:       } else if (++numMoreLooked == numLookMore) {
409:         /* filter has been optimal */
410:         filterInfo->filterOK = 2;
411:         break;
412:       }
413:       /* try stepping further to see whether it can improve */
414:       stepLeft = PETSC_TRUE;
415:       if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
416:     }
417:     /* check whether we can really "step" */
418:     if (leftBoundaryMet) {
419:       if (filterInfo->filterType == 1 || rightBoundaryMet) break;  /* cannot step further, so break the loop */
420:       if (stepLeft) {
421:         /* cannot step left, so try stepping right */
422:         stepLeft  = PETSC_FALSE;
423:         stepRight = PETSC_TRUE;
424:       }
425:     }
426:     if (rightBoundaryMet && stepRight) {
427:       /* cannot step right, so try stepping left */
428:       stepRight = PETSC_FALSE;
429:       stepLeft  = PETSC_TRUE;
430:     }
431:     /* now "step" */
432:     if (stepLeft) {
433:       numLeftSteps++;
434:       if (filterInfo->filterType == 2) leftDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
435:       z1 -= leftDelta;
436:     }
437:     if (stepRight) {
438:       numRightSteps++;
439:       rightDelta *= opts->shiftStepExpanRate;  /* expand the step for faster convergence */
440:       z2 += rightDelta;
441:     }
442:     if (filterInfo->filterType == 2) {
443:       /* shrink the "plateau" of the (dual) base filter */
444:       if (stepLeft && stepRight) halfPlateau /= opts->plateauShrinkRate;
445:       else halfPlateau /= PetscSqrtReal(opts->plateauShrinkRate);
446:     }
447:   }
448:   PetscCheck(filterInfo->filterOK,PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"STFILTER cannot get the filter specified; please adjust your filter parameters (e.g. increasing the polynomial degree)");

450:   filterInfo->totalNumIter = numIter;
451:   PetscCall(PetscFree2(polyFilter,baseFilter));
452:   PetscFunctionReturn(PETSC_SUCCESS);
453: }

455: /* ////////////////////////////////////////////////////////////////////////////
456:    //    Chebyshev Polynomials
457:    //////////////////////////////////////////////////////////////////////////// */

459: /*
460:    FILTLAN function ExpandNewtonPolynomialInChebyshevBasis

462:    translate the coefficients of a Newton polynomial to the coefficients
463:    in a basis of the `translated' (scale-and-shift) Chebyshev polynomials

465:    input:
466:    a Newton polynomial defined by vectors a and x:
467:        P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1))
468:    the interval [aa,bb] defines the `translated' Chebyshev polynomials S_i(z) = T_i((z-c)/h),
469:        where c=(aa+bb)/2 and h=(bb-aa)/2, and T_i is the Chebyshev polynomial of the first kind
470:    note that T_i is defined by T_0(z)=1, T_1(z)=z, and T_i(z)=2*z*T_{i-1}(z)+T_{i-2}(z) for i>=2

472:    output:
473:    a vector q containing the Chebyshev coefficients:
474:        P(z) = q(1)*S_0(z) + q(2)*S_1(z) + ... + q(n)*S_{n-1}(z)
475: */
476: static inline PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt n,PetscReal aa,PetscReal bb,PetscReal *a,PetscReal *x,PetscReal *q,PetscReal *q2)
477: {
478:   PetscInt  m,mm;
479:   PetscReal *sa,*sx,*sq,*sq2,c,c2,h,h2;

481:   PetscFunctionBegin;
482:   sa = a+n;    /* pointers for traversing a and x */
483:   sx = x+n-1;
484:   *q = *--sa;  /* set q[0] = a(n) */

486:   c = (aa+bb)/2.0;
487:   h = (bb-aa)/2.0;
488:   h2 = h/2.0;

490:   for (m=1;m<=n-1;m++) {  /* the main loop for translation */

492:     /* compute q2[0:m-1] = (c-x[n-m-1])*q[0:m-1] */
493:     mm = m;
494:     sq = q;
495:     sq2 = q2;
496:     c2 = c-(*--sx);
497:     while (mm--) *(sq2++) = c2*(*sq++);
498:     *sq2 = 0.0;         /* q2[m] = 0.0 */
499:     *(q2+1) += h*(*q);  /* q2[1] = q2[1] + h*q[0] */

501:     /* compute q2[0:m-2] = q2[0:m-2] + h2*q[1:m-1] */
502:     mm = m-1;
503:     sq2 = q2;
504:     sq = q+1;
505:     while (mm--) *(sq2++) += h2*(*sq++);

507:     /* compute q2[2:m] = q2[2:m] + h2*q[1:m-1] */
508:     mm = m-1;
509:     sq2 = q2+2;
510:     sq = q+1;
511:     while (mm--) *(sq2++) += h2*(*sq++);

513:     /* compute q[0:m] = q2[0:m] */
514:     mm = m+1;
515:     sq2 = q2;
516:     sq = q;
517:     while (mm--) *sq++ = *sq2++;
518:     *q += (*--sa);      /* q[0] = q[0] + p[n-m-1] */
519:   }
520:   PetscFunctionReturn(PETSC_SUCCESS);
521: }

523: /*
524:    FILTLAN function PolynomialEvaluationInChebyshevBasis

526:    evaluate P(z) at z=z0, where P(z) is a polynomial expanded in a basis of
527:    the `translated' (i.e. scale-and-shift) Chebyshev polynomials

529:    input:
530:    c is a vector of Chebyshev coefficients which defines the polynomial
531:        P(z) = c(1)*S_0(z) + c(2)*S_1(z) + ... + c(n)*S_{n-1}(z),
532:    where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
533:        c = (intv(j)+intv(j+1)) / 2,  h = (intv(j+1)-intv(j)) / 2
534:    note that T_i(z) is the Chebyshev polynomial of the first kind,
535:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2

537:    output:
538:    the evaluated value of P(z) at z=z0
539: */
540: static inline PetscReal FILTLAN_PolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscInt idx,PetscReal z0,PetscReal aa,PetscReal bb)
541: {
542:   PetscInt  ii,deg1;
543:   PetscReal y,zz,t0,t1,t2,*sc;

545:   PetscFunctionBegin;
546:   deg1 = m;
547:   if (aa==-1.0 && bb==1.0) zz = z0;  /* treat it as a special case to reduce rounding errors */
548:   else zz = (aa==bb)? 0.0 : -1.0+2.0*(z0-aa)/(bb-aa);

550:   /* compute y = P(z0), where we utilize the Chebyshev recursion */
551:   sc = pp+(idx-1)*m;   /* sc points to column idx of pp */
552:   y = *sc++;
553:   t1 = 1.0; t2 = zz;
554:   ii = deg1-1;
555:   while (ii--) {
556:     /* Chebyshev recursion: T_0(zz)=1, T_1(zz)=zz, and T_{i+1}(zz) = 2*zz*T_i(zz) + T_{i-1}(zz) for i>=2
557:        the values of T_{i+1}(zz), T_i(zz), T_{i-1}(zz) are stored in t0, t1, t2, respectively */
558:     t0 = 2*zz*t1 - t2;
559:     /* it also works for the base case / the first iteration, where t0 equals 2*zz*1-zz == zz which is T_1(zz) */
560:     t2 = t1;
561:     t1 = t0;
562:     y += (*sc++)*t0;
563:   }
564:   PetscFunctionReturn(y);
565: }

567: #define basisTranslated PETSC_TRUE
568: /*
569:    FILTLAN function PiecewisePolynomialEvaluationInChebyshevBasis

571:    evaluate P(z) at z=z0, where P(z) is a piecewise polynomial expanded
572:    in a basis of the (optionally translated, i.e. scale-and-shift) Chebyshev polynomials for each interval

574:    input:
575:    intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
576:    pp is a matrix of Chebyshev coefficients which defines a piecewise polynomial P(z)
577:    in a basis of the `translated' Chebyshev polynomials in each interval
578:    the polynomial P_j(z) in the j-th interval, i.e. when z is in [intv(j), intv(j+1)), is defined by the j-th column of pp:
579:        if basisTranslated == false, then
580:            P_j(z) = pp(1,j)*T_0(z) + pp(2,j)*T_1(z) + ... + pp(n,j)*T_{n-1}(z),
581:        where T_i(z) is the Chebyshev polynomial of the first kind,
582:            T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
583:        if basisTranslated == true, then
584:            P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
585:        where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
586:            c = (intv(j)+intv(j+1)) / 2,  h = (intv(j+1)-intv(j)) / 2

588:    output:
589:    the evaluated value of P(z) at z=z0

591:    note that if z0 falls below the first interval, then the polynomial in the first interval will be used to evaluate P(z0)
592:              if z0 flies over  the last  interval, then the polynomial in the last  interval will be used to evaluate P(z0)
593: */
594: static inline PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscReal *intv,PetscInt npoints,PetscReal z0)
595: {
596:   PetscReal *sintv,aa,bb,resul;
597:   PetscInt  idx;

599:   PetscFunctionBegin;
600:   /* determine the interval which contains z0 */
601:   sintv = &intv[1];
602:   idx = 1;
603:   if (npoints>2 && z0 >= *sintv) {
604:     sintv++;
605:     while (++idx < npoints-1) {
606:       if (z0 < *sintv) break;
607:       sintv++;
608:     }
609:   }
610:   /* idx==1 if npoints<=2; otherwise idx satisfies:
611:          intv(idx) <= z0 < intv(idx+1),  if 2 <= idx <= npoints-2
612:          z0 < intv(idx+1),               if idx == 1
613:          intv(idx) <= z0,                if idx == npoints-1
614:      in addition, sintv points to &intv(idx+1) */
615:   if (basisTranslated) {
616:     /* the basis consists of `translated' Chebyshev polynomials */
617:     /* find the interval of concern, [aa,bb] */
618:     aa = *(sintv-1);
619:     bb = *sintv;
620:     resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,aa,bb);
621:   } else {
622:     /* the basis consists of standard Chebyshev polynomials, with interval [-1.0,1.0] for integration */
623:     resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,-1.0,1.0);
624:   }
625:   PetscFunctionReturn(resul);
626: }

628: /*
629:    FILTLAN function PiecewisePolynomialInnerProductInChebyshevBasis

631:    compute the weighted inner product of two piecewise polynomials expanded
632:    in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval

634:    pp and qq are two matrices of Chebyshev coefficients which define the piecewise polynomials P(z) and Q(z), respectively
635:    for z in the j-th interval, P(z) equals
636:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
637:    and Q(z) equals
638:        Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
639:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
640:        S_i((z-c)/h) = T_i(z),  c = (aa+bb)) / 2,  h = (bb-aa) / 2,
641:    with T_i(z) the Chebyshev polynomial of the first kind
642:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2

644:    the (scaled) j-th interval inner product is defined by
645:        <P_j,Q_j> = (Pi/2)*(pp(1,j)*qq(1,j) + sum_{k} pp(k,j)*qq(k,j)),
646:    which comes from the property
647:        <T_0,T_0>=pi, <T_i,T_i>=pi/2 for i>=1, and <T_i,T_j>=0 for i!=j

649:    the weighted inner product is <P,Q> = sum_{j} intervalWeights(j)*<P_j,Q_j>,
650:    which is the return value

652:    note that for unit weights, pass an empty vector of intervalWeights (i.e. of length 0)
653: */
654: static inline PetscReal FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(PetscReal *pp,PetscInt prows,PetscInt pcols,PetscInt ldp,PetscReal *qq,PetscInt qrows,PetscInt qcols,PetscInt ldq,const PetscReal *intervalWeights)
655: {
656:   PetscInt  nintv,deg1,i,k;
657:   PetscReal *sp,*sq,ans=0.0,ans2;

659:   PetscFunctionBegin;
660:   deg1 = PetscMin(prows,qrows);  /* number of effective coefficients, one more than the effective polynomial degree */
661:   if (PetscUnlikely(!deg1)) PetscFunctionReturn(0.0);
662:   nintv = PetscMin(pcols,qcols);  /* number of intervals */

664:   /* scaled by intervalWeights(i) in the i-th interval (we assume intervalWeights[] are always provided).
665:      compute ans = sum_{i=1,...,nintv} intervalWeights(i)*[ pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) ] */
666:   for (i=0;i<nintv;i++) {   /* compute ans2 = pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) */
667:     sp = pp+i*ldp;
668:     sq = qq+i*ldq;
669:     ans2 = (*sp) * (*sq);  /* the first term pp(1,i)*qq(1,i) is being added twice */
670:     for (k=0;k<deg1;k++) ans2 += (*sp++)*(*sq++);  /* add pp(k,i)*qq(k,i) */
671:     ans += ans2*intervalWeights[i];  /* compute ans += ans2*intervalWeights(i) */
672:   }
673:   PetscFunctionReturn(ans*PETSC_PI/2.0);
674: }

676: /*
677:    FILTLAN function PiecewisePolynomialInChebyshevBasisMultiplyX

679:    compute Q(z) = z*P(z), where P(z) and Q(z) are piecewise polynomials expanded
680:    in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval

682:    P(z) and Q(z) are stored as matrices of Chebyshev coefficients pp and qq, respectively

684:    For z in the j-th interval, P(z) equals
685:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
686:    and Q(z) equals
687:        Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
688:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
689:        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
690:    with T_i(z) the Chebyshev polynomial of the first kind
691:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2

693:    the returned matrix is qq which represents Q(z) = z*P(z)
694: */
695: static inline PetscErrorCode FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(PetscReal *pp,PetscInt deg1,PetscInt ldp,PetscReal *intv,PetscInt nintv,PetscReal *qq,PetscInt ldq)
696: {
697:   PetscInt  i,j;
698:   PetscReal c,h,h2,tmp,*sp,*sq,*sp2,*sq2;

700:   PetscFunctionBegin;
701:   for (j=0;j<nintv;j++) {    /* consider interval between intv(j) and intv(j+1) */
702:     sp = pp+j*ldp;
703:     sq = qq+j*ldq;
704:     sp2 = sp;
705:     sq2 = sq+1;
706:     c = 0.5*(intv[j] + intv[j+1]);   /* compute c = (intv(j) + intv(j+1))/2 */
707:     h = 0.5*(intv[j+1] - intv[j]);   /* compute h = (intv(j+1) - intv(j))/2  and  h2 = h/2 */
708:     h2 = 0.5*h;
709:     i = deg1;
710:     while (i--) *sq++ = c*(*sp++);    /* compute q(1:deg1,j) = c*p(1:deg1,j) */
711:     *sq++ = 0.0;                      /* set q(deg1+1,j) = 0.0 */
712:     *(sq2++) += h*(*sp2++);           /* compute q(2,j) = q(2,j) + h*p(1,j) */
713:     i = deg1-1;
714:     while (i--) {       /* compute q(3:deg1+1,j) += h2*p(2:deg1,j) and then q(1:deg1-1,j) += h2*p(2:deg1,j) */
715:       tmp = h2*(*sp2++);
716:       *(sq2-2) += tmp;
717:       *(sq2++) += tmp;
718:     }
719:   }
720:   PetscFunctionReturn(PETSC_SUCCESS);
721: }

723: /* ////////////////////////////////////////////////////////////////////////////
724:    //    Conjugate Residual Method in the Polynomial Space
725:    //////////////////////////////////////////////////////////////////////////// */

727: /*
728:     B := alpha*A + beta*B

730:     A,B are nxk
731: */
732: static inline PetscErrorCode Mat_AXPY_BLAS(PetscInt n,PetscInt k,PetscReal alpha,const PetscReal *A,PetscInt lda,PetscReal beta,PetscReal *B,PetscInt ldb)
733: {
734:   PetscInt       i,j;

736:   PetscFunctionBegin;
737:   if (beta==(PetscReal)1.0) {
738:     for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] += alpha*A[i+j*lda];
739:     PetscCall(PetscLogFlops(2.0*n*k));
740:   } else {
741:     for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] = alpha*A[i+j*lda] + beta*B[i+j*ldb];
742:     PetscCall(PetscLogFlops(3.0*n*k));
743:   }
744:   PetscFunctionReturn(PETSC_SUCCESS);
745: }

747: /*
748:    FILTLAN function FilteredConjugateResidualPolynomial

750:    ** Conjugate Residual Method in the Polynomial Space

752:    this routine employs a conjugate-residual-type algorithm in polynomial space to minimize ||P(z)-Q(z)||_w,
753:    where P(z), the base filter, is the input piecewise polynomial, and
754:          Q(z) is the output polynomial satisfying Q(0)==1, i.e. the constant term of Q(z) is 1
755:    niter is the number of conjugate-residual iterations; therefore, the degree of Q(z) is up to niter+1
756:    both P(z) and Q(z) are expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
757:    and presented as matrices of Chebyshev coefficients, denoted by pp and qq, respectively

759:    input:
760:    intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
761:    w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
762:        the interval weights define the inner product of two continuous functions and then
763:        the derived w-norm ||P(z)-Q(z)||_w
764:    pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
765:    to be specific, for z in [intv(j), intv(j+1)), P(z) equals
766:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
767:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
768:        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
769:    with T_i(z) the Chebyshev polynomial of the first kind,
770:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2

772:    output:
773:    the return matrix, denoted by qq, represents a polynomial Q(z) with degree up to 1+niter
774:    and satisfying Q(0)==1, such that ||P(z))-Q(z)||_w is minimized
775:    this polynomial Q(z) is expanded in the `translated' Chebyshev basis for each interval
776:    to be precise, considering z in [intv(j), intv(j+1)), Q(z) equals
777:        Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(niter+2,j)*S_{niter+1}(z)

779:    note:
780:    1. since Q(0)==1, P(0)==1 is expected; if P(0)!=1, one can translate P(z)
781:       for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
782:    2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
783:       in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
784: */
785: static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal *cpol,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt m,PetscReal *intervalWeights,PetscInt niter)
786: {
787:   PetscInt       i,j,srpol,scpol,sarpol,sppol,sappol,ld,nintv;
788:   PetscReal      rho,rho0,rho1,den,bet,alp,alp0,*ppol,*rpol,*appol,*arpol;

790:   PetscFunctionBegin;
791:   nintv = m-1;
792:   ld = niter+2;  /* leading dimension */
793:   PetscCall(PetscCalloc4(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&appol,ld*nintv,&arpol));
794:   PetscCall(PetscArrayzero(cpol,ld*nintv));
795:   /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
796:   sppol = 2;
797:   srpol = 2;
798:   scpol = 2;
799:   for (j=0;j<nintv;j++) {
800:     ppol[j*ld] = 1.0;
801:     rpol[j*ld] = 1.0;
802:     cpol[j*ld] = 1.0;
803:   }
804:   /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
805:      rpol is the r-polynomial (corresponding to the residual vector r in CG)
806:      cpol is the "corrected" residual polynomial (result of this function) */
807:   sappol = 3;
808:   sarpol = 3;
809:   PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
810:   for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
811:   rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
812:   for (i=0;i<niter;i++) {
813:     den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
814:     alp0 = rho/den;
815:     rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
816:     alp  = (rho-rho1)/den;
817:     srpol++;
818:     scpol++;
819:     PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
820:     PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
821:     if (i+1 == niter) break;
822:     sarpol++;
823:     PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
824:     rho0 = rho;
825:     rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
826:     bet  = rho / rho0;
827:     sppol++;
828:     sappol++;
829:     PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
830:     PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
831:   }
832:   PetscCall(PetscFree4(ppol,rpol,appol,arpol));
833:   PetscFunctionReturn(PETSC_SUCCESS);
834: }

836: /*
837:    FILTLAN function FilteredConjugateResidualMatrixPolynomialVectorProduct

839:    this routine employs a conjugate-residual-type algorithm in polynomial space to compute
840:    x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized, where
841:    P(z) is a given piecewise polynomial, called the base filter,
842:    s(z) is a polynomial of degree up to niter, the number of conjugate-residual iterations,
843:    and b and x0 are given vectors

845:    note that P(z) is expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
846:    and presented as a matrix of Chebyshev coefficients pp

848:    input:
849:    A is a sparse matrix
850:    x0, b are vectors
851:    niter is the number of conjugate-residual iterations
852:    intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
853:    w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
854:        the interval weights define the inner product of two continuous functions and then
855:        the derived w-norm ||P(z)-Q(z)||_w
856:    pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
857:    to be specific, for z in [intv(j), intv(j+1)), P(z) equals
858:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
859:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
860:        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
861:    with T_i(z) the Chebyshev polynomial of the first kind,
862:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
863:    tol is the tolerance; if the residual polynomial in z-norm is dropped by a factor lower
864:        than tol, then stop the conjugate-residual iteration

866:    output:
867:    the return vector is x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized,
868:    subject to that s(z) is a polynomial of degree up to niter, where P(z) is the base filter
869:    in short, z*s(z) approximates 1-P(z)

871:    note:
872:    1. since z*s(z) approximates 1-P(z), P(0)==1 is expected; if P(0)!=1, one can translate P(z)
873:       for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
874:    2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
875:       in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
876:    3. a common application is to compute R(A)*b, where R(z) approximates 1-P(z)
877:       in this case, one can set x0 = 0 and then the return vector is x = s(A)*b, where
878:       z*s(z) approximates 1-P(z); therefore, A*x is the wanted R(A)*b
879: */
880: static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(Mat A,Vec b,Vec x,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work)
881: {
882:   PetscInt       i,j,srpol,scpol,sarpol,sppol,sappol,ld;
883:   PetscReal      rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
884:   Vec            r=work[0],p=work[1],ap=work[2],w=work[3];
885:   PetscScalar    alpha;

887:   PetscFunctionBegin;
888:   ld = niter+3;  /* leading dimension */
889:   PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol));
890:   /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
891:   sppol = 2;
892:   srpol = 2;
893:   scpol = 2;
894:   for (j=0;j<nintv;j++) {
895:     ppol[j*ld] = 1.0;
896:     rpol[j*ld] = 1.0;
897:     cpol[j*ld] = 1.0;
898:   }
899:   /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
900:      rpol is the r-polynomial (corresponding to the residual vector r in CG)
901:      cpol is the "corrected" residual polynomial */
902:   sappol = 3;
903:   sarpol = 3;
904:   PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
905:   for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
906:   rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
907:   rho = rho00;

909:   /* corrected CR in vector space */
910:   /* we assume x0 is always 0 */
911:   PetscCall(VecSet(x,0.0));
912:   PetscCall(VecCopy(b,r));     /* initial residual r = b-A*x0 */
913:   PetscCall(VecCopy(r,p));     /* p = r */
914:   PetscCall(MatMult(A,p,ap));  /* ap = A*p */

916:   for (i=0;i<niter;i++) {
917:     /* iteration in the polynomial space */
918:     den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
919:     alp0 = rho/den;
920:     rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
921:     alp  = (rho-rho1)/den;
922:     srpol++;
923:     scpol++;
924:     PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
925:     PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
926:     sarpol++;
927:     PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
928:     rho0 = rho;
929:     rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);

931:     /* update x in the vector space */
932:     alpha = alp;
933:     PetscCall(VecAXPY(x,alpha,p));   /* x += alp*p */
934:     if (rho < tol*rho00) break;

936:     /* finish the iteration in the polynomial space */
937:     bet = rho / rho0;
938:     sppol++;
939:     sappol++;
940:     PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
941:     PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));

943:     /* finish the iteration in the vector space */
944:     alpha = -alp0;
945:     PetscCall(VecAXPY(r,alpha,ap));    /* r -= alp0*ap */
946:     alpha = bet;
947:     PetscCall(VecAYPX(p,alpha,r));     /* p = r + bet*p */
948:     PetscCall(MatMult(A,r,w));         /* ap = A*r + bet*ap */
949:     PetscCall(VecAYPX(ap,alpha,w));
950:   }
951:   PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol));
952:   PetscFunctionReturn(PETSC_SUCCESS);
953: }

955: /*
956:    Gateway to FILTLAN for evaluating y=p(A)*x
957: */
958: static PetscErrorCode MatMult_FILTLAN(Mat A,Vec x,Vec y)
959: {
960:   ST             st;
961:   ST_FILTER      *ctx;
962:   PetscInt       npoints;

964:   PetscFunctionBegin;
965:   PetscCall(MatShellGetContext(A,&st));
966:   ctx = (ST_FILTER*)st->data;
967:   npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
968:   PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(ctx->T,x,y,ctx->baseFilter,2*ctx->baseDegree+2,ctx->intervals,npoints-1,ctx->opts->intervalWeights,ctx->polyDegree,st->work));
969:   PetscCall(VecCopy(y,st->work[0]));
970:   PetscCall(MatMult(ctx->T,st->work[0],y));
971:   PetscFunctionReturn(PETSC_SUCCESS);
972: }

974: /* Block version of FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct */
975: static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(Mat A,Mat B,Mat C,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work,Mat R,Mat P,Mat AP,Mat W)
976: {
977:   PetscInt       i,j,srpol,scpol,sarpol,sppol,sappol,ld;
978:   PetscReal      rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
979:   PetscScalar    alpha;

981:   PetscFunctionBegin;
982:   ld = niter+3;  /* leading dimension */
983:   PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol));
984:   /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
985:   sppol = 2;
986:   srpol = 2;
987:   scpol = 2;
988:   for (j=0;j<nintv;j++) {
989:     ppol[j*ld] = 1.0;
990:     rpol[j*ld] = 1.0;
991:     cpol[j*ld] = 1.0;
992:   }
993:   /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
994:      rpol is the r-polynomial (corresponding to the residual vector r in CG)
995:      cpol is the "corrected" residual polynomial */
996:   sappol = 3;
997:   sarpol = 3;
998:   PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
999:   for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
1000:   rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
1001:   rho = rho00;

1003:   /* corrected CR in vector space */
1004:   /* we assume x0 is always 0 */
1005:   PetscCall(MatZeroEntries(C));
1006:   PetscCall(MatCopy(B,R,SAME_NONZERO_PATTERN));     /* initial residual r = b-A*x0 */
1007:   PetscCall(MatCopy(R,P,SAME_NONZERO_PATTERN));     /* p = r */
1008:   PetscCall(MatMatMult(A,P,MAT_REUSE_MATRIX,PETSC_DEFAULT,&AP));  /* ap = A*p */

1010:   for (i=0;i<niter;i++) {
1011:     /* iteration in the polynomial space */
1012:     den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
1013:     alp0 = rho/den;
1014:     rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
1015:     alp  = (rho-rho1)/den;
1016:     srpol++;
1017:     scpol++;
1018:     PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
1019:     PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
1020:     sarpol++;
1021:     PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
1022:     rho0 = rho;
1023:     rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);

1025:     /* update x in the vector space */
1026:     alpha = alp;
1027:     PetscCall(MatAXPY(C,alpha,P,SAME_NONZERO_PATTERN));   /* x += alp*p */
1028:     if (rho < tol*rho00) break;

1030:     /* finish the iteration in the polynomial space */
1031:     bet = rho / rho0;
1032:     sppol++;
1033:     sappol++;
1034:     PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
1035:     PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));

1037:     /* finish the iteration in the vector space */
1038:     alpha = -alp0;
1039:     PetscCall(MatAXPY(R,alpha,AP,SAME_NONZERO_PATTERN));    /* r -= alp0*ap */
1040:     alpha = bet;
1041:     PetscCall(MatAYPX(P,alpha,R,SAME_NONZERO_PATTERN));     /* p = r + bet*p */
1042:     PetscCall(MatMatMult(A,R,MAT_REUSE_MATRIX,PETSC_DEFAULT,&W));         /* ap = A*r + bet*ap */
1043:     PetscCall(MatAYPX(AP,alpha,W,SAME_NONZERO_PATTERN));
1044:   }
1045:   PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol));
1046:   PetscFunctionReturn(PETSC_SUCCESS);
1047: }

1049: /*
1050:    Gateway to FILTLAN for evaluating C=p(A)*B
1051: */
1052: static PetscErrorCode MatMatMult_FILTLAN(Mat A,Mat B,Mat C,void *pctx)
1053: {
1054:   ST             st;
1055:   ST_FILTER      *ctx;
1056:   PetscInt       i,m1,m2,npoints;

1058:   PetscFunctionBegin;
1059:   PetscCall(MatShellGetContext(A,&st));
1060:   ctx = (ST_FILTER*)st->data;
1061:   npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
1062:   if (ctx->nW) {  /* check if work matrices must be resized */
1063:     PetscCall(MatGetSize(B,NULL,&m1));
1064:     PetscCall(MatGetSize(ctx->W[0],NULL,&m2));
1065:     if (m1!=m2) {
1066:       PetscCall(MatDestroyMatrices(ctx->nW,&ctx->W));
1067:       ctx->nW = 0;
1068:     }
1069:   }
1070:   if (!ctx->nW) {  /* allocate work matrices */
1071:     ctx->nW = 4;
1072:     PetscCall(PetscMalloc1(ctx->nW,&ctx->W));
1073:     for (i=0;i<ctx->nW;i++) PetscCall(MatDuplicate(B,MAT_DO_NOT_COPY_VALUES,&ctx->W[i]));
1074:   }
1075:   PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(ctx->T,B,ctx->W[0],ctx->baseFilter,2*ctx->baseDegree+2,ctx->intervals,npoints-1,ctx->opts->intervalWeights,ctx->polyDegree,st->work,C,ctx->W[1],ctx->W[2],ctx->W[3]));
1076:   PetscCall(MatMatMult(ctx->T,ctx->W[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&C));
1077:   PetscFunctionReturn(PETSC_SUCCESS);
1078: }

1080: /*
1081:    FILTLAN function PolynomialFilterInterface::setFilter

1083:    Creates the shifted (and scaled) matrix and the base filter P(z).
1084:    M is a shell matrix whose MatMult() applies the filter.
1085: */
1086: PetscErrorCode STFilter_FILTLAN_setFilter(ST st,Mat *G)
1087: {
1088:   ST_FILTER      *ctx = (ST_FILTER*)st->data;
1089:   PetscInt       i,npoints,n,m,N,M;
1090:   PetscReal      frame2[4];
1091:   PetscScalar    alpha;
1092:   const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 };

1094:   PetscFunctionBegin;
1095:   if (ctx->frame[0] == ctx->frame[1]) {  /* low pass filter, convert it to high pass filter */
1096:     /* T = frame[3]*eye(n) - A */
1097:     PetscCall(MatDestroy(&ctx->T));
1098:     PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T));
1099:     PetscCall(MatScale(ctx->T,-1.0));
1100:     alpha = ctx->frame[3];
1101:     PetscCall(MatShift(ctx->T,alpha));
1102:     for (i=0;i<4;i++) frame2[i] = ctx->frame[3] - ctx->frame[3-i];
1103:   } else {  /* it can be a mid-pass filter or a high-pass filter */
1104:     if (ctx->frame[0] == 0.0) {
1105:       PetscCall(PetscObjectReference((PetscObject)st->A[0]));
1106:       PetscCall(MatDestroy(&ctx->T));
1107:       ctx->T = st->A[0];
1108:       for (i=0;i<4;i++) frame2[i] = ctx->frame[i];
1109:     } else {
1110:       /* T = A - frame[0]*eye(n) */
1111:       PetscCall(MatDestroy(&ctx->T));
1112:       PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T));
1113:       alpha = -ctx->frame[0];
1114:       PetscCall(MatShift(ctx->T,alpha));
1115:       for (i=0;i<4;i++) frame2[i] = ctx->frame[i] - ctx->frame[0];
1116:     }
1117:   }

1119:   /* no need to recompute filter if the parameters did not change */
1120:   if (st->state==ST_STATE_INITIAL || ctx->filtch) {
1121:     PetscCall(FILTLAN_GetIntervals(ctx->intervals,frame2,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo));
1122:     /* translate the intervals back */
1123:     if (ctx->frame[0] == ctx->frame[1]) {  /* low pass filter, convert it to high pass filter */
1124:       for (i=0;i<4;i++) ctx->intervals2[i] = ctx->frame[3] - ctx->intervals[3-i];
1125:     } else {  /* it can be a mid-pass filter or a high-pass filter */
1126:       if (ctx->frame[0] == 0.0) {
1127:         for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i];
1128:       } else {
1129:         for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i] + ctx->frame[0];
1130:       }
1131:     }

1133:     npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
1134:     PetscCall(PetscFree(ctx->baseFilter));
1135:     PetscCall(PetscMalloc1((2*ctx->baseDegree+2)*(npoints-1),&ctx->baseFilter));
1136:     PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(ctx->baseFilter,ctx->intervals,npoints,HighLowFlags,ctx->baseDegree));
1137:     PetscCall(PetscInfo(st,"Computed value of yLimit = %g\n",(double)ctx->filterInfo->yLimit));
1138:   }
1139:   ctx->filtch = PETSC_FALSE;

1141:   /* create shell matrix*/
1142:   if (!*G) {
1143:     PetscCall(MatGetSize(ctx->T,&N,&M));
1144:     PetscCall(MatGetLocalSize(ctx->T,&n,&m));
1145:     PetscCall(MatCreateShell(PetscObjectComm((PetscObject)st),n,m,N,M,st,G));
1146:     PetscCall(MatShellSetOperation(*G,MATOP_MULT,(void(*)(void))MatMult_FILTLAN));
1147:     PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSE,MATDENSE));
1148:     PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSECUDA,MATDENSECUDA));
1149:   }
1150:   PetscFunctionReturn(PETSC_SUCCESS);
1151: }