1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: This file is an adaptation of several subroutines from FILTLAN, the
12: Filtered Lanczos Package, authored by Haw-ren Fang and Yousef Saad.
14: More information at:
15: https://www-users.cs.umn.edu/~saad/software/filtlan
17: References:
19: [1] H. Fang and Y. Saad, "A filtered Lanczos procedure for extreme and interior
20: eigenvalue problems", SIAM J. Sci. Comput. 34(4):A2220-A2246, 2012.
21: */
23: #include <slepc/private/stimpl.h> 24: #include "filter.h"
26: static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal*,PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal*,PetscInt);
27: static PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal);
28: static PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt,PetscReal,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*);
30: /* ////////////////////////////////////////////////////////////////////////////
31: // Newton - Hermite Polynomial Interpolation
32: //////////////////////////////////////////////////////////////////////////// */
34: /*
35: FILTLAN function NewtonPolynomial
37: build P(z) by Newton's divided differences in the form
38: P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1)),
39: such that P(x(i)) = y(i) for i=1,...,n, where
40: x,y are input vectors of length n, and a is the output vector of length n
41: if x(i)==x(j) for some i!=j, then it is assumed that the derivative of P(z) is to be zero at x(i),
42: and the Hermite polynomial interpolation is applied
43: in general, if there are k x(i)'s with the same value x0, then
44: the j-th order derivative of P(z) is zero at z=x0 for j=1,...,k-1
45: */
46: static inline PetscErrorCode FILTLAN_NewtonPolynomial(PetscInt n,PetscReal *x,PetscReal *y,PetscReal *sa,PetscReal *sf) 47: {
48: PetscReal d,*sx=x,*sy=y;
49: PetscInt j,k;
51: PetscFunctionBegin;
52: PetscCall(PetscArraycpy(sf,sy,n));
54: /* apply Newton's finite difference method */
55: sa[0] = sf[0];
56: for (j=1;j<n;j++) {
57: for (k=n-1;k>=j;k--) {
58: d = sx[k]-sx[k-j];
59: if (PetscUnlikely(d == 0.0)) sf[k] = 0.0; /* assume that the derivative is 0.0 and apply the Hermite interpolation */
60: else sf[k] = (sf[k]-sf[k-1]) / d;
61: }
62: sa[j] = sf[j];
63: }
64: PetscFunctionReturn(PETSC_SUCCESS);
65: }
67: /*
68: FILTLAN function HermiteBaseFilterInChebyshevBasis
70: compute a base filter P(z) which is a continuous, piecewise polynomial P(z) expanded
71: in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials in each interval
73: The base filter P(z) equals P_j(z) for z in the j-th interval [intv(j), intv(j+1)), where
74: P_j(z) a Hermite interpolating polynomial
76: input:
77: intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
78: HiLowFlags determines the shape of the base filter P(z)
79: Consider the j-th interval [intv(j), intv(j+1)]
80: HighLowFlag[j-1]==1, P(z)==1 for z in [intv(j), intv(j+1)]
81: ==0, P(z)==0 for z in [intv(j), intv(j+1)]
82: ==-1, [intv(j), intv(j+1)] is a transition interval;
83: P(intv(j)) and P(intv(j+1)) are defined such that P(z) is continuous
84: baseDeg is the degree of smoothness of the Hermite (piecewise polynomial) interpolation
85: to be precise, the i-th derivative of P(z) is zero, i.e. d^{i}P(z)/dz^i==0, at all interval
86: end points z=intv(j) for i=1,...,baseDeg
88: output:
89: P(z) expanded in a basis of `translated' (scale-and-shift) Chebyshev polynomials
90: to be precise, for z in the j-th interval [intv(j),intv(j+1)), P(z) equals
91: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
92: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
93: S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
94: with T_i(z) the Chebyshev polynomial of the first kind,
95: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
96: the return matrix is the matrix of Chebyshev coefficients pp just described
98: note that the degree of P(z) in each interval is (at most) 2*baseDeg+1, with 2*baseDeg+2 coefficients
99: let n be the length of intv; then there are n-1 intervals
100: therefore the return matrix pp is of size (2*baseDeg+2)-by-(n-1)
101: */
102: static PetscErrorCode FILTLAN_HermiteBaseFilterInChebyshevBasis(PetscReal *baseFilter,PetscReal *intv,PetscInt npoints,const PetscInt *HighLowFlags,PetscInt baseDeg)103: {
104: PetscInt m,ii,jj;
105: PetscReal flag,flag0,flag2,aa,bb,*px,*py,*sx,*sy,*pp,*qq,*sq,*sbf,*work,*currentPoint = intv;
106: const PetscInt *hilo = HighLowFlags;
108: PetscFunctionBegin;
109: m = 2*baseDeg+2;
110: jj = npoints-1; /* jj is initialized as the number of intervals */
111: PetscCall(PetscMalloc5(m,&px,m,&py,m,&pp,m,&qq,m,&work));
112: sbf = baseFilter;
114: while (jj--) { /* the main loop to compute the Chebyshev coefficients */
116: flag = (PetscReal)(*hilo++); /* get the flag of the current interval */
117: if (flag == -1.0) { /* flag == -1 means that the current interval is a transition polynomial */
119: flag2 = (PetscReal)(*hilo); /* get flag2, the flag of the next interval */
120: flag0 = 1.0-flag2; /* the flag of the previous interval is 1-flag2 */
122: /* two pointers for traversing x[] and y[] */
123: sx = px;
124: sy = py;
126: /* find the current interval [aa,bb] */
127: aa = *currentPoint++;
128: bb = *currentPoint;
130: /* now left-hand side */
131: ii = baseDeg+1;
132: while (ii--) {
133: *sy++ = flag0;
134: *sx++ = aa;
135: }
137: /* now right-hand side */
138: ii = baseDeg+1;
139: while (ii--) {
140: *sy++ = flag2;
141: *sx++ = bb;
142: }
144: /* build a Newton polynomial (indeed, the generalized Hermite interpolating polynomial) with x[] and y[] */
145: PetscCall(FILTLAN_NewtonPolynomial(m,px,py,pp,work));
147: /* pp contains coefficients of the Newton polynomial P(z) in the current interval [aa,bb], where
148: P(z) = pp(1) + pp(2)*(z-px(1)) + pp(3)*(z-px(1))*(z-px(2)) + ... + pp(n)*(z-px(1))*...*(z-px(n-1)) */
150: /* translate the Newton coefficients to the Chebyshev coefficients */
151: PetscCall(FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(m,aa,bb,pp,px,qq,work));
152: /* qq contains coefficients of the polynomial in [aa,bb] in the `translated' Chebyshev basis */
154: /* copy the Chebyshev coefficients to baseFilter
155: OCTAVE/MATLAB: B(:,j) = qq, where j = (npoints-1)-jj and B is the return matrix */
156: sq = qq;
157: ii = 2*baseDeg+2;
158: while (ii--) *sbf++ = *sq++;
160: } else {
162: /* a constant polynomial P(z)=flag, where either flag==0 or flag==1
163: OCTAVE/MATLAB: B(1,j) = flag, where j = (npoints-1)-jj and B is the return matrix */
164: *sbf++ = flag;
166: /* the other coefficients are all zero, since P(z) is a constant
167: OCTAVE/MATLAB: B(1,j) = 0, where j = (npoints-1)-jj and B is the return matrix */
168: ii = 2*baseDeg+1;
169: while (ii--) *sbf++ = 0.0;
171: /* for the next point */
172: currentPoint++;
173: }
174: }
175: PetscCall(PetscFree5(px,py,pp,qq,work));
176: PetscFunctionReturn(PETSC_SUCCESS);
177: }
179: /* ////////////////////////////////////////////////////////////////////////////
180: // Base Filter
181: //////////////////////////////////////////////////////////////////////////// */
183: /*
184: FILTLAN function GetIntervals
186: this routine determines the intervals (including the transition one(s)) by an iterative process
188: frame is a vector consisting of 4 ordered elements:
189: [frame(1),frame(4)] is the interval which (tightly) contains all eigenvalues, and
190: [frame(2),frame(3)] is the interval in which the eigenvalues are sought
191: baseDeg is the left-and-right degree of the base filter for each interval
192: polyDeg is the (maximum possible) degree of s(z), with z*s(z) the polynomial filter
193: intv is the output; the j-th interval is [intv(j),intv(j+1))
194: opts is a collection of interval options
196: the base filter P(z) is a piecewise polynomial from Hermite interpolation with degree baseDeg
197: at each end point of intervals
199: the polynomial filter Q(z) is in the form z*s(z), i.e. Q(0)==0, such that ||(1-P(z))-z*s(z)||_w is
200: minimized with s(z) a polynomial of degree up to polyDeg
202: the resulting polynomial filter Q(z) satisfies Q(x)>=Q(y) for x in [frame[1],frame[2]], and
203: y in [frame[0],frame[3]] but not in [frame[1],frame[2]]
205: the routine fills a PolynomialFilterInfo struct which gives some information of the polynomial filter
206: */
207: static PetscErrorCode FILTLAN_GetIntervals(PetscReal *intervals,PetscReal *frame,PetscInt polyDeg,PetscInt baseDeg,FILTLAN_IOP opts,FILTLAN_PFI filterInfo)208: {
209: PetscReal intv[6],x,y,z1,z2,c,c1,c2,fc,fc2,halfPlateau,leftDelta,rightDelta,gridSize;
210: PetscReal yLimit,ySummit,yLeftLimit,yRightLimit,bottom,qIndex,*baseFilter,*polyFilter;
211: PetscReal yLimitGap=0.0,yLeftSummit=0.0,yLeftBottom=0.0,yRightSummit=0.0,yRightBottom=0.0;
212: PetscInt i,ii,npoints,numIter,numLeftSteps=1,numRightSteps=1,numMoreLooked=0;
213: PetscBool leftBoundaryMet=PETSC_FALSE,rightBoundaryMet=PETSC_FALSE,stepLeft,stepRight;
214: const PetscReal a=frame[0],a1=frame[1],b1=frame[2],b=frame[3];
215: const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 }; /* if filterType is 1, only first 3 elements will be used */
216: const PetscInt numLookMore = 2*(PetscInt)(0.5+(PetscLogReal(2.0)/PetscLogReal(opts->shiftStepExpanRate)));
218: PetscFunctionBegin;
219: PetscCheck(a<=a1 && a1<=b1 && b1<=b,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Values in the frame vector should be non-decreasing");
220: PetscCheck(a1!=b1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The range of wanted eigenvalues cannot be of size zero");
221: filterInfo->filterType = 2; /* mid-pass filter, for interior eigenvalues */
222: if (b == b1) {
223: PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"A polynomial filter should not cover all eigenvalues");
224: filterInfo->filterType = 1; /* high-pass filter, for largest eigenvalues */
225: } else PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"filterType==3 for smallest eigenvalues should be pre-converted to filterType==1 for largest eigenvalues");
227: /* the following recipe follows Yousef Saad (2005, 2006) with a few minor adaptations / enhancements */
228: halfPlateau = 0.5*(b1-a1)*opts->initialPlateau; /* half length of the "plateau" of the (dual) base filter */
229: leftDelta = (b1-a1)*opts->initialShiftStep; /* initial left shift */
230: rightDelta = leftDelta; /* initial right shift */
231: opts->numGridPoints = PetscMax(opts->numGridPoints,(PetscInt)(2.0*(b-a)/halfPlateau));
232: gridSize = (b-a) / (PetscReal)opts->numGridPoints;
234: for (i=0;i<6;i++) intv[i] = 0.0;
235: if (filterInfo->filterType == 2) { /* for interior eigenvalues */
236: npoints = 6;
237: intv[0] = a;
238: intv[5] = b;
239: /* intv[1], intv[2], intv[3], and intv[4] to be determined */
240: } else { /* filterType == 1 (or 3 with conversion), for extreme eigenvalues */
241: npoints = 4;
242: intv[0] = a;
243: intv[3] = b;
244: /* intv[1], and intv[2] to be determined */
245: }
246: z1 = a1 - leftDelta;
247: z2 = b1 + rightDelta;
248: filterInfo->filterOK = 0; /* not yet found any OK filter */
250: /* allocate matrices */
251: PetscCall(PetscMalloc2((polyDeg+2)*(npoints-1),&polyFilter,(2*baseDeg+2)*(npoints-1),&baseFilter));
253: /* initialize the intervals, mainly for the case opts->maxOuterIter == 0 */
254: intervals[0] = intv[0];
255: intervals[3] = intv[3];
256: intervals[5] = intv[5];
257: intervals[1] = z1;
258: if (filterInfo->filterType == 2) { /* a mid-pass filter for interior eigenvalues */
259: intervals[4] = z2;
260: c = (a1+b1) / 2.0;
261: intervals[2] = c - halfPlateau;
262: intervals[3] = c + halfPlateau;
263: } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
264: intervals[2] = z1 + (b1-z1)*opts->transIntervalRatio;
265: }
267: /* the main loop */
268: for (numIter=1;numIter<=opts->maxOuterIter;numIter++) {
269: if (z1 <= a) { /* outer loop updates z1 and z2 */
270: z1 = a;
271: leftBoundaryMet = PETSC_TRUE;
272: }
273: if (filterInfo->filterType == 2) { /* a <= z1 < (a1) */
274: if (z2 >= b) { /* a mid-pass filter for interior eigenvalues */
275: z2 = b;
276: rightBoundaryMet = PETSC_TRUE;
277: }
278: /* a <= z1 < c-h < c+h < z2 <= b, where h is halfPlateau */
279: /* [z1, c-h] and [c+h, z2] are transition interval */
280: intv[1] = z1;
281: intv[4] = z2;
282: c1 = z1 + halfPlateau;
283: intv[2] = z1; /* i.e. c1 - halfPlateau */
284: intv[3] = c1 + halfPlateau;
285: PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
286: PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
287: /* fc1 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); */
288: c2 = z2 - halfPlateau;
289: intv[2] = c2 - halfPlateau;
290: intv[3] = z2; /* i.e. c2 + halfPlateau */
291: PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
292: PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
293: fc2 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
294: yLimitGap = PETSC_MAX_REAL;
295: ii = opts->maxInnerIter;
296: while (ii-- && !(yLimitGap <= opts->yLimitTol)) {
297: /* recursive bisection to get c such that p(a1) are p(b1) approximately the same */
298: c = (c1+c2) / 2.0;
299: intv[2] = c - halfPlateau;
300: intv[3] = c + halfPlateau;
301: PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
302: PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
303: fc = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
304: if (fc*fc2 < 0.0) {
305: c1 = c;
306: /* fc1 = fc; */
307: } else {
308: c2 = c;
309: fc2 = fc;
310: }
311: yLimitGap = PetscAbsReal(fc);
312: }
313: } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
314: intv[1] = z1;
315: intv[2] = z1 + (b1-z1)*opts->transIntervalRatio;
316: PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,4,HighLowFlags,baseDeg));
317: PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,4,opts->intervalWeights,polyDeg));
318: }
319: /* polyFilter contains the coefficients of the polynomial filter which approximates phi(x)
320: expanded in the `translated' Chebyshev basis */
321: /* psi(x) = 1.0 - phi(x) is the dual base filter approximated by a polynomial in the form x*p(x) */
322: yLeftLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
323: yRightLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1);
324: yLimit = (yLeftLimit < yRightLimit) ? yLeftLimit : yRightLimit;
325: ySummit = (yLeftLimit > yRightLimit) ? yLeftLimit : yRightLimit;
326: x = a1;
327: while ((x+=gridSize) < b1) {
328: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
329: if (y < yLimit) yLimit = y;
330: if (y > ySummit) ySummit = y;
331: }
332: /* now yLimit is the minimum of x*p(x) for x in [a1, b1] */
333: stepLeft = PETSC_FALSE;
334: stepRight = PETSC_FALSE;
335: if ((yLimit < yLeftLimit && yLimit < yRightLimit) || yLimit < opts->yBottomLine) {
336: /* very bad, step to see what will happen */
337: stepLeft = PETSC_TRUE;
338: if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
339: } else if (filterInfo->filterType == 2) {
340: if (yLeftLimit < yRightLimit) {
341: if (yRightLimit-yLeftLimit > opts->yLimitTol) stepLeft = PETSC_TRUE;
342: } else if (yLeftLimit-yRightLimit > opts->yLimitTol) stepRight = PETSC_TRUE;
343: }
344: if (!stepLeft) {
345: yLeftBottom = yLeftLimit;
346: x = a1;
347: while ((x-=gridSize) >= a) {
348: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
349: if (y < yLeftBottom) yLeftBottom = y;
350: else if (y > yLeftBottom) break;
351: }
352: yLeftSummit = yLeftBottom;
353: while ((x-=gridSize) >= a) {
354: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
355: if (y > yLeftSummit) {
356: yLeftSummit = y;
357: if (yLeftSummit > yLimit*opts->yRippleLimit) {
358: stepLeft = PETSC_TRUE;
359: break;
360: }
361: }
362: if (y < yLeftBottom) yLeftBottom = y;
363: }
364: }
365: if (filterInfo->filterType == 2 && !stepRight) {
366: yRightBottom = yRightLimit;
367: x = b1;
368: while ((x+=gridSize) <= b) {
369: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
370: if (y < yRightBottom) yRightBottom = y;
371: else if (y > yRightBottom) break;
372: }
373: yRightSummit = yRightBottom;
374: while ((x+=gridSize) <= b) {
375: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
376: if (y > yRightSummit) {
377: yRightSummit = y;
378: if (yRightSummit > yLimit*opts->yRippleLimit) {
379: stepRight = PETSC_TRUE;
380: break;
381: }
382: }
383: if (y < yRightBottom) yRightBottom = y;
384: }
385: }
386: if (!stepLeft && !stepRight) {
387: if (filterInfo->filterType == 2) bottom = PetscMin(yLeftBottom,yRightBottom);
388: else bottom = yLeftBottom;
389: qIndex = 1.0 - (yLimit-bottom) / (ySummit-bottom);
390: if (filterInfo->filterOK == 0 || filterInfo->filterQualityIndex < qIndex) {
391: /* found the first OK filter or a better filter */
392: for (i=0;i<6;i++) intervals[i] = intv[i];
393: filterInfo->filterOK = 1;
394: filterInfo->filterQualityIndex = qIndex;
395: filterInfo->numIter = numIter;
396: filterInfo->yLimit = yLimit;
397: filterInfo->ySummit = ySummit;
398: filterInfo->numLeftSteps = numLeftSteps;
399: filterInfo->yLeftSummit = yLeftSummit;
400: filterInfo->yLeftBottom = yLeftBottom;
401: if (filterInfo->filterType == 2) {
402: filterInfo->yLimitGap = yLimitGap;
403: filterInfo->numRightSteps = numRightSteps;
404: filterInfo->yRightSummit = yRightSummit;
405: filterInfo->yRightBottom = yRightBottom;
406: }
407: numMoreLooked = 0;
408: } else if (++numMoreLooked == numLookMore) {
409: /* filter has been optimal */
410: filterInfo->filterOK = 2;
411: break;
412: }
413: /* try stepping further to see whether it can improve */
414: stepLeft = PETSC_TRUE;
415: if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
416: }
417: /* check whether we can really "step" */
418: if (leftBoundaryMet) {
419: if (filterInfo->filterType == 1 || rightBoundaryMet) break; /* cannot step further, so break the loop */
420: if (stepLeft) {
421: /* cannot step left, so try stepping right */
422: stepLeft = PETSC_FALSE;
423: stepRight = PETSC_TRUE;
424: }
425: }
426: if (rightBoundaryMet && stepRight) {
427: /* cannot step right, so try stepping left */
428: stepRight = PETSC_FALSE;
429: stepLeft = PETSC_TRUE;
430: }
431: /* now "step" */
432: if (stepLeft) {
433: numLeftSteps++;
434: if (filterInfo->filterType == 2) leftDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
435: z1 -= leftDelta;
436: }
437: if (stepRight) {
438: numRightSteps++;
439: rightDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
440: z2 += rightDelta;
441: }
442: if (filterInfo->filterType == 2) {
443: /* shrink the "plateau" of the (dual) base filter */
444: if (stepLeft && stepRight) halfPlateau /= opts->plateauShrinkRate;
445: else halfPlateau /= PetscSqrtReal(opts->plateauShrinkRate);
446: }
447: }
448: PetscCheck(filterInfo->filterOK,PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"STFILTER cannot get the filter specified; please adjust your filter parameters (e.g. increasing the polynomial degree)");
450: filterInfo->totalNumIter = numIter;
451: PetscCall(PetscFree2(polyFilter,baseFilter));
452: PetscFunctionReturn(PETSC_SUCCESS);
453: }
455: /* ////////////////////////////////////////////////////////////////////////////
456: // Chebyshev Polynomials
457: //////////////////////////////////////////////////////////////////////////// */
459: /*
460: FILTLAN function ExpandNewtonPolynomialInChebyshevBasis
462: translate the coefficients of a Newton polynomial to the coefficients
463: in a basis of the `translated' (scale-and-shift) Chebyshev polynomials
465: input:
466: a Newton polynomial defined by vectors a and x:
467: P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1))
468: the interval [aa,bb] defines the `translated' Chebyshev polynomials S_i(z) = T_i((z-c)/h),
469: where c=(aa+bb)/2 and h=(bb-aa)/2, and T_i is the Chebyshev polynomial of the first kind
470: note that T_i is defined by T_0(z)=1, T_1(z)=z, and T_i(z)=2*z*T_{i-1}(z)+T_{i-2}(z) for i>=2
472: output:
473: a vector q containing the Chebyshev coefficients:
474: P(z) = q(1)*S_0(z) + q(2)*S_1(z) + ... + q(n)*S_{n-1}(z)
475: */
476: static inline PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt n,PetscReal aa,PetscReal bb,PetscReal *a,PetscReal *x,PetscReal *q,PetscReal *q2)477: {
478: PetscInt m,mm;
479: PetscReal *sa,*sx,*sq,*sq2,c,c2,h,h2;
481: PetscFunctionBegin;
482: sa = a+n; /* pointers for traversing a and x */
483: sx = x+n-1;
484: *q = *--sa; /* set q[0] = a(n) */
486: c = (aa+bb)/2.0;
487: h = (bb-aa)/2.0;
488: h2 = h/2.0;
490: for (m=1;m<=n-1;m++) { /* the main loop for translation */
492: /* compute q2[0:m-1] = (c-x[n-m-1])*q[0:m-1] */
493: mm = m;
494: sq = q;
495: sq2 = q2;
496: c2 = c-(*--sx);
497: while (mm--) *(sq2++) = c2*(*sq++);
498: *sq2 = 0.0; /* q2[m] = 0.0 */
499: *(q2+1) += h*(*q); /* q2[1] = q2[1] + h*q[0] */
501: /* compute q2[0:m-2] = q2[0:m-2] + h2*q[1:m-1] */
502: mm = m-1;
503: sq2 = q2;
504: sq = q+1;
505: while (mm--) *(sq2++) += h2*(*sq++);
507: /* compute q2[2:m] = q2[2:m] + h2*q[1:m-1] */
508: mm = m-1;
509: sq2 = q2+2;
510: sq = q+1;
511: while (mm--) *(sq2++) += h2*(*sq++);
513: /* compute q[0:m] = q2[0:m] */
514: mm = m+1;
515: sq2 = q2;
516: sq = q;
517: while (mm--) *sq++ = *sq2++;
518: *q += (*--sa); /* q[0] = q[0] + p[n-m-1] */
519: }
520: PetscFunctionReturn(PETSC_SUCCESS);
521: }
523: /*
524: FILTLAN function PolynomialEvaluationInChebyshevBasis
526: evaluate P(z) at z=z0, where P(z) is a polynomial expanded in a basis of
527: the `translated' (i.e. scale-and-shift) Chebyshev polynomials
529: input:
530: c is a vector of Chebyshev coefficients which defines the polynomial
531: P(z) = c(1)*S_0(z) + c(2)*S_1(z) + ... + c(n)*S_{n-1}(z),
532: where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
533: c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2
534: note that T_i(z) is the Chebyshev polynomial of the first kind,
535: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
537: output:
538: the evaluated value of P(z) at z=z0
539: */
540: static inline PetscReal FILTLAN_PolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscInt idx,PetscReal z0,PetscReal aa,PetscReal bb)541: {
542: PetscInt ii,deg1;
543: PetscReal y,zz,t0,t1,t2,*sc;
545: PetscFunctionBegin;
546: deg1 = m;
547: if (aa==-1.0 && bb==1.0) zz = z0; /* treat it as a special case to reduce rounding errors */
548: else zz = (aa==bb)? 0.0 : -1.0+2.0*(z0-aa)/(bb-aa);
550: /* compute y = P(z0), where we utilize the Chebyshev recursion */
551: sc = pp+(idx-1)*m; /* sc points to column idx of pp */
552: y = *sc++;
553: t1 = 1.0; t2 = zz;
554: ii = deg1-1;
555: while (ii--) {
556: /* Chebyshev recursion: T_0(zz)=1, T_1(zz)=zz, and T_{i+1}(zz) = 2*zz*T_i(zz) + T_{i-1}(zz) for i>=2
557: the values of T_{i+1}(zz), T_i(zz), T_{i-1}(zz) are stored in t0, t1, t2, respectively */
558: t0 = 2*zz*t1 - t2;
559: /* it also works for the base case / the first iteration, where t0 equals 2*zz*1-zz == zz which is T_1(zz) */
560: t2 = t1;
561: t1 = t0;
562: y += (*sc++)*t0;
563: }
564: PetscFunctionReturn(y);
565: }
567: #define basisTranslated PETSC_TRUE568: /*
569: FILTLAN function PiecewisePolynomialEvaluationInChebyshevBasis
571: evaluate P(z) at z=z0, where P(z) is a piecewise polynomial expanded
572: in a basis of the (optionally translated, i.e. scale-and-shift) Chebyshev polynomials for each interval
574: input:
575: intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
576: pp is a matrix of Chebyshev coefficients which defines a piecewise polynomial P(z)
577: in a basis of the `translated' Chebyshev polynomials in each interval
578: the polynomial P_j(z) in the j-th interval, i.e. when z is in [intv(j), intv(j+1)), is defined by the j-th column of pp:
579: if basisTranslated == false, then
580: P_j(z) = pp(1,j)*T_0(z) + pp(2,j)*T_1(z) + ... + pp(n,j)*T_{n-1}(z),
581: where T_i(z) is the Chebyshev polynomial of the first kind,
582: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
583: if basisTranslated == true, then
584: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
585: where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
586: c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2
588: output:
589: the evaluated value of P(z) at z=z0
591: note that if z0 falls below the first interval, then the polynomial in the first interval will be used to evaluate P(z0)
592: if z0 flies over the last interval, then the polynomial in the last interval will be used to evaluate P(z0)
593: */
594: static inline PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscReal *intv,PetscInt npoints,PetscReal z0)595: {
596: PetscReal *sintv,aa,bb,resul;
597: PetscInt idx;
599: PetscFunctionBegin;
600: /* determine the interval which contains z0 */
601: sintv = &intv[1];
602: idx = 1;
603: if (npoints>2 && z0 >= *sintv) {
604: sintv++;
605: while (++idx < npoints-1) {
606: if (z0 < *sintv) break;
607: sintv++;
608: }
609: }
610: /* idx==1 if npoints<=2; otherwise idx satisfies:
611: intv(idx) <= z0 < intv(idx+1), if 2 <= idx <= npoints-2
612: z0 < intv(idx+1), if idx == 1
613: intv(idx) <= z0, if idx == npoints-1
614: in addition, sintv points to &intv(idx+1) */
615: if (basisTranslated) {
616: /* the basis consists of `translated' Chebyshev polynomials */
617: /* find the interval of concern, [aa,bb] */
618: aa = *(sintv-1);
619: bb = *sintv;
620: resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,aa,bb);
621: } else {
622: /* the basis consists of standard Chebyshev polynomials, with interval [-1.0,1.0] for integration */
623: resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,-1.0,1.0);
624: }
625: PetscFunctionReturn(resul);
626: }
628: /*
629: FILTLAN function PiecewisePolynomialInnerProductInChebyshevBasis
631: compute the weighted inner product of two piecewise polynomials expanded
632: in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
634: pp and qq are two matrices of Chebyshev coefficients which define the piecewise polynomials P(z) and Q(z), respectively
635: for z in the j-th interval, P(z) equals
636: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
637: and Q(z) equals
638: Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
639: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
640: S_i((z-c)/h) = T_i(z), c = (aa+bb)) / 2, h = (bb-aa) / 2,
641: with T_i(z) the Chebyshev polynomial of the first kind
642: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
644: the (scaled) j-th interval inner product is defined by
645: <P_j,Q_j> = (Pi/2)*(pp(1,j)*qq(1,j) + sum_{k} pp(k,j)*qq(k,j)),
646: which comes from the property
647: <T_0,T_0>=pi, <T_i,T_i>=pi/2 for i>=1, and <T_i,T_j>=0 for i!=j
649: the weighted inner product is <P,Q> = sum_{j} intervalWeights(j)*<P_j,Q_j>,
650: which is the return value
652: note that for unit weights, pass an empty vector of intervalWeights (i.e. of length 0)
653: */
654: static inline PetscReal FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(PetscReal *pp,PetscInt prows,PetscInt pcols,PetscInt ldp,PetscReal *qq,PetscInt qrows,PetscInt qcols,PetscInt ldq,const PetscReal *intervalWeights)655: {
656: PetscInt nintv,deg1,i,k;
657: PetscReal *sp,*sq,ans=0.0,ans2;
659: PetscFunctionBegin;
660: deg1 = PetscMin(prows,qrows); /* number of effective coefficients, one more than the effective polynomial degree */
661: if (PetscUnlikely(!deg1)) PetscFunctionReturn(0.0);
662: nintv = PetscMin(pcols,qcols); /* number of intervals */
664: /* scaled by intervalWeights(i) in the i-th interval (we assume intervalWeights[] are always provided).
665: compute ans = sum_{i=1,...,nintv} intervalWeights(i)*[ pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) ] */
666: for (i=0;i<nintv;i++) { /* compute ans2 = pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) */
667: sp = pp+i*ldp;
668: sq = qq+i*ldq;
669: ans2 = (*sp) * (*sq); /* the first term pp(1,i)*qq(1,i) is being added twice */
670: for (k=0;k<deg1;k++) ans2 += (*sp++)*(*sq++); /* add pp(k,i)*qq(k,i) */
671: ans += ans2*intervalWeights[i]; /* compute ans += ans2*intervalWeights(i) */
672: }
673: PetscFunctionReturn(ans*PETSC_PI/2.0);
674: }
676: /*
677: FILTLAN function PiecewisePolynomialInChebyshevBasisMultiplyX
679: compute Q(z) = z*P(z), where P(z) and Q(z) are piecewise polynomials expanded
680: in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
682: P(z) and Q(z) are stored as matrices of Chebyshev coefficients pp and qq, respectively
684: For z in the j-th interval, P(z) equals
685: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
686: and Q(z) equals
687: Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
688: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
689: S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
690: with T_i(z) the Chebyshev polynomial of the first kind
691: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
693: the returned matrix is qq which represents Q(z) = z*P(z)
694: */
695: static inline PetscErrorCode FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(PetscReal *pp,PetscInt deg1,PetscInt ldp,PetscReal *intv,PetscInt nintv,PetscReal *qq,PetscInt ldq)696: {
697: PetscInt i,j;
698: PetscReal c,h,h2,tmp,*sp,*sq,*sp2,*sq2;
700: PetscFunctionBegin;
701: for (j=0;j<nintv;j++) { /* consider interval between intv(j) and intv(j+1) */
702: sp = pp+j*ldp;
703: sq = qq+j*ldq;
704: sp2 = sp;
705: sq2 = sq+1;
706: c = 0.5*(intv[j] + intv[j+1]); /* compute c = (intv(j) + intv(j+1))/2 */
707: h = 0.5*(intv[j+1] - intv[j]); /* compute h = (intv(j+1) - intv(j))/2 and h2 = h/2 */
708: h2 = 0.5*h;
709: i = deg1;
710: while (i--) *sq++ = c*(*sp++); /* compute q(1:deg1,j) = c*p(1:deg1,j) */
711: *sq++ = 0.0; /* set q(deg1+1,j) = 0.0 */
712: *(sq2++) += h*(*sp2++); /* compute q(2,j) = q(2,j) + h*p(1,j) */
713: i = deg1-1;
714: while (i--) { /* compute q(3:deg1+1,j) += h2*p(2:deg1,j) and then q(1:deg1-1,j) += h2*p(2:deg1,j) */
715: tmp = h2*(*sp2++);
716: *(sq2-2) += tmp;
717: *(sq2++) += tmp;
718: }
719: }
720: PetscFunctionReturn(PETSC_SUCCESS);
721: }
723: /* ////////////////////////////////////////////////////////////////////////////
724: // Conjugate Residual Method in the Polynomial Space
725: //////////////////////////////////////////////////////////////////////////// */
727: /*
728: B := alpha*A + beta*B
730: A,B are nxk
731: */
732: static inline PetscErrorCode Mat_AXPY_BLAS(PetscInt n,PetscInt k,PetscReal alpha,const PetscReal *A,PetscInt lda,PetscReal beta,PetscReal *B,PetscInt ldb)733: {
734: PetscInt i,j;
736: PetscFunctionBegin;
737: if (beta==(PetscReal)1.0) {
738: for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] += alpha*A[i+j*lda];
739: PetscCall(PetscLogFlops(2.0*n*k));
740: } else {
741: for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] = alpha*A[i+j*lda] + beta*B[i+j*ldb];
742: PetscCall(PetscLogFlops(3.0*n*k));
743: }
744: PetscFunctionReturn(PETSC_SUCCESS);
745: }
747: /*
748: FILTLAN function FilteredConjugateResidualPolynomial
750: ** Conjugate Residual Method in the Polynomial Space
752: this routine employs a conjugate-residual-type algorithm in polynomial space to minimize ||P(z)-Q(z)||_w,
753: where P(z), the base filter, is the input piecewise polynomial, and
754: Q(z) is the output polynomial satisfying Q(0)==1, i.e. the constant term of Q(z) is 1
755: niter is the number of conjugate-residual iterations; therefore, the degree of Q(z) is up to niter+1
756: both P(z) and Q(z) are expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
757: and presented as matrices of Chebyshev coefficients, denoted by pp and qq, respectively
759: input:
760: intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
761: w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
762: the interval weights define the inner product of two continuous functions and then
763: the derived w-norm ||P(z)-Q(z)||_w
764: pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
765: to be specific, for z in [intv(j), intv(j+1)), P(z) equals
766: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
767: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
768: S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
769: with T_i(z) the Chebyshev polynomial of the first kind,
770: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
772: output:
773: the return matrix, denoted by qq, represents a polynomial Q(z) with degree up to 1+niter
774: and satisfying Q(0)==1, such that ||P(z))-Q(z)||_w is minimized
775: this polynomial Q(z) is expanded in the `translated' Chebyshev basis for each interval
776: to be precise, considering z in [intv(j), intv(j+1)), Q(z) equals
777: Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(niter+2,j)*S_{niter+1}(z)
779: note:
780: 1. since Q(0)==1, P(0)==1 is expected; if P(0)!=1, one can translate P(z)
781: for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
782: 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
783: in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
784: */
785: static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal *cpol,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt m,PetscReal *intervalWeights,PetscInt niter)786: {
787: PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld,nintv;
788: PetscReal rho,rho0,rho1,den,bet,alp,alp0,*ppol,*rpol,*appol,*arpol;
790: PetscFunctionBegin;
791: nintv = m-1;
792: ld = niter+2; /* leading dimension */
793: PetscCall(PetscCalloc4(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&appol,ld*nintv,&arpol));
794: PetscCall(PetscArrayzero(cpol,ld*nintv));
795: /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
796: sppol = 2;
797: srpol = 2;
798: scpol = 2;
799: for (j=0;j<nintv;j++) {
800: ppol[j*ld] = 1.0;
801: rpol[j*ld] = 1.0;
802: cpol[j*ld] = 1.0;
803: }
804: /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
805: rpol is the r-polynomial (corresponding to the residual vector r in CG)
806: cpol is the "corrected" residual polynomial (result of this function) */
807: sappol = 3;
808: sarpol = 3;
809: PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
810: for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
811: rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
812: for (i=0;i<niter;i++) {
813: den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
814: alp0 = rho/den;
815: rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
816: alp = (rho-rho1)/den;
817: srpol++;
818: scpol++;
819: PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
820: PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
821: if (i+1 == niter) break;
822: sarpol++;
823: PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
824: rho0 = rho;
825: rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
826: bet = rho / rho0;
827: sppol++;
828: sappol++;
829: PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
830: PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
831: }
832: PetscCall(PetscFree4(ppol,rpol,appol,arpol));
833: PetscFunctionReturn(PETSC_SUCCESS);
834: }
836: /*
837: FILTLAN function FilteredConjugateResidualMatrixPolynomialVectorProduct
839: this routine employs a conjugate-residual-type algorithm in polynomial space to compute
840: x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized, where
841: P(z) is a given piecewise polynomial, called the base filter,
842: s(z) is a polynomial of degree up to niter, the number of conjugate-residual iterations,
843: and b and x0 are given vectors
845: note that P(z) is expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
846: and presented as a matrix of Chebyshev coefficients pp
848: input:
849: A is a sparse matrix
850: x0, b are vectors
851: niter is the number of conjugate-residual iterations
852: intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
853: w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
854: the interval weights define the inner product of two continuous functions and then
855: the derived w-norm ||P(z)-Q(z)||_w
856: pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
857: to be specific, for z in [intv(j), intv(j+1)), P(z) equals
858: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
859: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
860: S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
861: with T_i(z) the Chebyshev polynomial of the first kind,
862: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
863: tol is the tolerance; if the residual polynomial in z-norm is dropped by a factor lower
864: than tol, then stop the conjugate-residual iteration
866: output:
867: the return vector is x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized,
868: subject to that s(z) is a polynomial of degree up to niter, where P(z) is the base filter
869: in short, z*s(z) approximates 1-P(z)
871: note:
872: 1. since z*s(z) approximates 1-P(z), P(0)==1 is expected; if P(0)!=1, one can translate P(z)
873: for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
874: 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
875: in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
876: 3. a common application is to compute R(A)*b, where R(z) approximates 1-P(z)
877: in this case, one can set x0 = 0 and then the return vector is x = s(A)*b, where
878: z*s(z) approximates 1-P(z); therefore, A*x is the wanted R(A)*b
879: */
880: static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(Mat A,Vec b,Vec x,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work)881: {
882: PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld;
883: PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
884: Vec r=work[0],p=work[1],ap=work[2],w=work[3];
885: PetscScalar alpha;
887: PetscFunctionBegin;
888: ld = niter+3; /* leading dimension */
889: PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol));
890: /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
891: sppol = 2;
892: srpol = 2;
893: scpol = 2;
894: for (j=0;j<nintv;j++) {
895: ppol[j*ld] = 1.0;
896: rpol[j*ld] = 1.0;
897: cpol[j*ld] = 1.0;
898: }
899: /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
900: rpol is the r-polynomial (corresponding to the residual vector r in CG)
901: cpol is the "corrected" residual polynomial */
902: sappol = 3;
903: sarpol = 3;
904: PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
905: for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
906: rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
907: rho = rho00;
909: /* corrected CR in vector space */
910: /* we assume x0 is always 0 */
911: PetscCall(VecSet(x,0.0));
912: PetscCall(VecCopy(b,r)); /* initial residual r = b-A*x0 */
913: PetscCall(VecCopy(r,p)); /* p = r */
914: PetscCall(MatMult(A,p,ap)); /* ap = A*p */
916: for (i=0;i<niter;i++) {
917: /* iteration in the polynomial space */
918: den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
919: alp0 = rho/den;
920: rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
921: alp = (rho-rho1)/den;
922: srpol++;
923: scpol++;
924: PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
925: PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
926: sarpol++;
927: PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
928: rho0 = rho;
929: rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
931: /* update x in the vector space */
932: alpha = alp;
933: PetscCall(VecAXPY(x,alpha,p)); /* x += alp*p */
934: if (rho < tol*rho00) break;
936: /* finish the iteration in the polynomial space */
937: bet = rho / rho0;
938: sppol++;
939: sappol++;
940: PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
941: PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
943: /* finish the iteration in the vector space */
944: alpha = -alp0;
945: PetscCall(VecAXPY(r,alpha,ap)); /* r -= alp0*ap */
946: alpha = bet;
947: PetscCall(VecAYPX(p,alpha,r)); /* p = r + bet*p */
948: PetscCall(MatMult(A,r,w)); /* ap = A*r + bet*ap */
949: PetscCall(VecAYPX(ap,alpha,w));
950: }
951: PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol));
952: PetscFunctionReturn(PETSC_SUCCESS);
953: }
955: /*
956: Gateway to FILTLAN for evaluating y=p(A)*x
957: */
958: static PetscErrorCode MatMult_FILTLAN(Mat A,Vec x,Vec y)959: {
960: ST st;
961: ST_FILTER *ctx;
962: PetscInt npoints;
964: PetscFunctionBegin;
965: PetscCall(MatShellGetContext(A,&st));
966: ctx = (ST_FILTER*)st->data;
967: npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
968: PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(ctx->T,x,y,ctx->baseFilter,2*ctx->baseDegree+2,ctx->intervals,npoints-1,ctx->opts->intervalWeights,ctx->polyDegree,st->work));
969: PetscCall(VecCopy(y,st->work[0]));
970: PetscCall(MatMult(ctx->T,st->work[0],y));
971: PetscFunctionReturn(PETSC_SUCCESS);
972: }
974: /* Block version of FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct */
975: static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(Mat A,Mat B,Mat C,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work,Mat R,Mat P,Mat AP,Mat W)976: {
977: PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld;
978: PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
979: PetscScalar alpha;
981: PetscFunctionBegin;
982: ld = niter+3; /* leading dimension */
983: PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol));
984: /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
985: sppol = 2;
986: srpol = 2;
987: scpol = 2;
988: for (j=0;j<nintv;j++) {
989: ppol[j*ld] = 1.0;
990: rpol[j*ld] = 1.0;
991: cpol[j*ld] = 1.0;
992: }
993: /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
994: rpol is the r-polynomial (corresponding to the residual vector r in CG)
995: cpol is the "corrected" residual polynomial */
996: sappol = 3;
997: sarpol = 3;
998: PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
999: for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
1000: rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
1001: rho = rho00;
1003: /* corrected CR in vector space */
1004: /* we assume x0 is always 0 */
1005: PetscCall(MatZeroEntries(C));
1006: PetscCall(MatCopy(B,R,SAME_NONZERO_PATTERN)); /* initial residual r = b-A*x0 */
1007: PetscCall(MatCopy(R,P,SAME_NONZERO_PATTERN)); /* p = r */
1008: PetscCall(MatMatMult(A,P,MAT_REUSE_MATRIX,PETSC_DEFAULT,&AP)); /* ap = A*p */
1010: for (i=0;i<niter;i++) {
1011: /* iteration in the polynomial space */
1012: den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
1013: alp0 = rho/den;
1014: rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
1015: alp = (rho-rho1)/den;
1016: srpol++;
1017: scpol++;
1018: PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
1019: PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
1020: sarpol++;
1021: PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
1022: rho0 = rho;
1023: rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
1025: /* update x in the vector space */
1026: alpha = alp;
1027: PetscCall(MatAXPY(C,alpha,P,SAME_NONZERO_PATTERN)); /* x += alp*p */
1028: if (rho < tol*rho00) break;
1030: /* finish the iteration in the polynomial space */
1031: bet = rho / rho0;
1032: sppol++;
1033: sappol++;
1034: PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
1035: PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
1037: /* finish the iteration in the vector space */
1038: alpha = -alp0;
1039: PetscCall(MatAXPY(R,alpha,AP,SAME_NONZERO_PATTERN)); /* r -= alp0*ap */
1040: alpha = bet;
1041: PetscCall(MatAYPX(P,alpha,R,SAME_NONZERO_PATTERN)); /* p = r + bet*p */
1042: PetscCall(MatMatMult(A,R,MAT_REUSE_MATRIX,PETSC_DEFAULT,&W)); /* ap = A*r + bet*ap */
1043: PetscCall(MatAYPX(AP,alpha,W,SAME_NONZERO_PATTERN));
1044: }
1045: PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol));
1046: PetscFunctionReturn(PETSC_SUCCESS);
1047: }
1049: /*
1050: Gateway to FILTLAN for evaluating C=p(A)*B
1051: */
1052: static PetscErrorCode MatMatMult_FILTLAN(Mat A,Mat B,Mat C,void *pctx)1053: {
1054: ST st;
1055: ST_FILTER *ctx;
1056: PetscInt i,m1,m2,npoints;
1058: PetscFunctionBegin;
1059: PetscCall(MatShellGetContext(A,&st));
1060: ctx = (ST_FILTER*)st->data;
1061: npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
1062: if (ctx->nW) { /* check if work matrices must be resized */
1063: PetscCall(MatGetSize(B,NULL,&m1));
1064: PetscCall(MatGetSize(ctx->W[0],NULL,&m2));
1065: if (m1!=m2) {
1066: PetscCall(MatDestroyMatrices(ctx->nW,&ctx->W));
1067: ctx->nW = 0;
1068: }
1069: }
1070: if (!ctx->nW) { /* allocate work matrices */
1071: ctx->nW = 4;
1072: PetscCall(PetscMalloc1(ctx->nW,&ctx->W));
1073: for (i=0;i<ctx->nW;i++) PetscCall(MatDuplicate(B,MAT_DO_NOT_COPY_VALUES,&ctx->W[i]));
1074: }
1075: PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(ctx->T,B,ctx->W[0],ctx->baseFilter,2*ctx->baseDegree+2,ctx->intervals,npoints-1,ctx->opts->intervalWeights,ctx->polyDegree,st->work,C,ctx->W[1],ctx->W[2],ctx->W[3]));
1076: PetscCall(MatMatMult(ctx->T,ctx->W[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&C));
1077: PetscFunctionReturn(PETSC_SUCCESS);
1078: }
1080: /*
1081: FILTLAN function PolynomialFilterInterface::setFilter
1083: Creates the shifted (and scaled) matrix and the base filter P(z).
1084: M is a shell matrix whose MatMult() applies the filter.
1085: */
1086: PetscErrorCode STFilter_FILTLAN_setFilter(ST st,Mat *G)1087: {
1088: ST_FILTER *ctx = (ST_FILTER*)st->data;
1089: PetscInt i,npoints,n,m,N,M;
1090: PetscReal frame2[4];
1091: PetscScalar alpha;
1092: const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 };
1094: PetscFunctionBegin;
1095: if (ctx->frame[0] == ctx->frame[1]) { /* low pass filter, convert it to high pass filter */
1096: /* T = frame[3]*eye(n) - A */
1097: PetscCall(MatDestroy(&ctx->T));
1098: PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T));
1099: PetscCall(MatScale(ctx->T,-1.0));
1100: alpha = ctx->frame[3];
1101: PetscCall(MatShift(ctx->T,alpha));
1102: for (i=0;i<4;i++) frame2[i] = ctx->frame[3] - ctx->frame[3-i];
1103: } else { /* it can be a mid-pass filter or a high-pass filter */
1104: if (ctx->frame[0] == 0.0) {
1105: PetscCall(PetscObjectReference((PetscObject)st->A[0]));
1106: PetscCall(MatDestroy(&ctx->T));
1107: ctx->T = st->A[0];
1108: for (i=0;i<4;i++) frame2[i] = ctx->frame[i];
1109: } else {
1110: /* T = A - frame[0]*eye(n) */
1111: PetscCall(MatDestroy(&ctx->T));
1112: PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T));
1113: alpha = -ctx->frame[0];
1114: PetscCall(MatShift(ctx->T,alpha));
1115: for (i=0;i<4;i++) frame2[i] = ctx->frame[i] - ctx->frame[0];
1116: }
1117: }
1119: /* no need to recompute filter if the parameters did not change */
1120: if (st->state==ST_STATE_INITIAL || ctx->filtch) {
1121: PetscCall(FILTLAN_GetIntervals(ctx->intervals,frame2,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo));
1122: /* translate the intervals back */
1123: if (ctx->frame[0] == ctx->frame[1]) { /* low pass filter, convert it to high pass filter */
1124: for (i=0;i<4;i++) ctx->intervals2[i] = ctx->frame[3] - ctx->intervals[3-i];
1125: } else { /* it can be a mid-pass filter or a high-pass filter */
1126: if (ctx->frame[0] == 0.0) {
1127: for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i];
1128: } else {
1129: for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i] + ctx->frame[0];
1130: }
1131: }
1133: npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
1134: PetscCall(PetscFree(ctx->baseFilter));
1135: PetscCall(PetscMalloc1((2*ctx->baseDegree+2)*(npoints-1),&ctx->baseFilter));
1136: PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(ctx->baseFilter,ctx->intervals,npoints,HighLowFlags,ctx->baseDegree));
1137: PetscCall(PetscInfo(st,"Computed value of yLimit = %g\n",(double)ctx->filterInfo->yLimit));
1138: }
1139: ctx->filtch = PETSC_FALSE;
1141: /* create shell matrix*/
1142: if (!*G) {
1143: PetscCall(MatGetSize(ctx->T,&N,&M));
1144: PetscCall(MatGetLocalSize(ctx->T,&n,&m));
1145: PetscCall(MatCreateShell(PetscObjectComm((PetscObject)st),n,m,N,M,st,G));
1146: PetscCall(MatShellSetOperation(*G,MATOP_MULT,(void(*)(void))MatMult_FILTLAN));
1147: PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSE,MATDENSE));
1148: PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSECUDA,MATDENSECUDA));
1149: }
1150: PetscFunctionReturn(PETSC_SUCCESS);
1151: }