Line data Source code
1 : /*
2 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 : SLEPc - Scalable Library for Eigenvalue Problem Computations
4 : Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5 :
6 : This file is part of SLEPc.
7 : SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 : */
10 : /*
11 : This file is an adaptation of several subroutines from FILTLAN, the
12 : Filtered Lanczos Package, authored by Haw-ren Fang and Yousef Saad.
13 :
14 : More information at:
15 : https://www-users.cs.umn.edu/~saad/software/filtlan
16 :
17 : References:
18 :
19 : [1] H. Fang and Y. Saad, "A filtered Lanczos procedure for extreme and interior
20 : eigenvalue problems", SIAM J. Sci. Comput. 34(4):A2220-A2246, 2012.
21 : */
22 :
23 : #include <slepc/private/stimpl.h>
24 : #include "filter.h"
25 :
26 : static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal*,PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal*,PetscInt);
27 : static PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal);
28 : static PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt,PetscReal,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*);
29 :
30 : /* ////////////////////////////////////////////////////////////////////////////
31 : // Newton - Hermite Polynomial Interpolation
32 : //////////////////////////////////////////////////////////////////////////// */
33 :
34 : /*
35 : FILTLAN function NewtonPolynomial
36 :
37 : build P(z) by Newton's divided differences in the form
38 : P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1)),
39 : such that P(x(i)) = y(i) for i=1,...,n, where
40 : x,y are input vectors of length n, and a is the output vector of length n
41 : if x(i)==x(j) for some i!=j, then it is assumed that the derivative of P(z) is to be zero at x(i),
42 : and the Hermite polynomial interpolation is applied
43 : in general, if there are k x(i)'s with the same value x0, then
44 : the j-th order derivative of P(z) is zero at z=x0 for j=1,...,k-1
45 : */
46 1764 : static inline PetscErrorCode FILTLAN_NewtonPolynomial(PetscInt n,PetscReal *x,PetscReal *y,PetscReal *sa,PetscReal *sf)
47 : {
48 1764 : PetscReal d,*sx=x,*sy=y;
49 1764 : PetscInt j,k;
50 :
51 1764 : PetscFunctionBegin;
52 1764 : PetscCall(PetscArraycpy(sf,sy,n));
53 :
54 : /* apply Newton's finite difference method */
55 1764 : sa[0] = sf[0];
56 38808 : for (j=1;j<n;j++) {
57 444528 : for (k=n-1;k>=j;k--) {
58 407484 : d = sx[k]-sx[k-j];
59 407484 : if (PetscUnlikely(d == 0.0)) sf[k] = 0.0; /* assume that the derivative is 0.0 and apply the Hermite interpolation */
60 196262 : else sf[k] = (sf[k]-sf[k-1]) / d;
61 : }
62 37044 : sa[j] = sf[j];
63 : }
64 1764 : PetscFunctionReturn(PETSC_SUCCESS);
65 : }
66 :
67 : /*
68 : FILTLAN function HermiteBaseFilterInChebyshevBasis
69 :
70 : compute a base filter P(z) which is a continuous, piecewise polynomial P(z) expanded
71 : in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials in each interval
72 :
73 : The base filter P(z) equals P_j(z) for z in the j-th interval [intv(j), intv(j+1)), where
74 : P_j(z) a Hermite interpolating polynomial
75 :
76 : input:
77 : intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
78 : HiLowFlags determines the shape of the base filter P(z)
79 : Consider the j-th interval [intv(j), intv(j+1)]
80 : HighLowFlag[j-1]==1, P(z)==1 for z in [intv(j), intv(j+1)]
81 : ==0, P(z)==0 for z in [intv(j), intv(j+1)]
82 : ==-1, [intv(j), intv(j+1)] is a transition interval;
83 : P(intv(j)) and P(intv(j+1)) are defined such that P(z) is continuous
84 : baseDeg is the degree of smoothness of the Hermite (piecewise polynomial) interpolation
85 : to be precise, the i-th derivative of P(z) is zero, i.e. d^{i}P(z)/dz^i==0, at all interval
86 : end points z=intv(j) for i=1,...,baseDeg
87 :
88 : output:
89 : P(z) expanded in a basis of `translated' (scale-and-shift) Chebyshev polynomials
90 : to be precise, for z in the j-th interval [intv(j),intv(j+1)), P(z) equals
91 : P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
92 : where S_i(z) is the `translated' Chebyshev polynomial in that interval,
93 : S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
94 : with T_i(z) the Chebyshev polynomial of the first kind,
95 : T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
96 : the return matrix is the matrix of Chebyshev coefficients pp just described
97 :
98 : note that the degree of P(z) in each interval is (at most) 2*baseDeg+1, with 2*baseDeg+2 coefficients
99 : let n be the length of intv; then there are n-1 intervals
100 : therefore the return matrix pp is of size (2*baseDeg+2)-by-(n-1)
101 : */
102 882 : static PetscErrorCode FILTLAN_HermiteBaseFilterInChebyshevBasis(PetscReal *baseFilter,PetscReal *intv,PetscInt npoints,const PetscInt *HighLowFlags,PetscInt baseDeg)
103 : {
104 882 : PetscInt m,ii,jj;
105 882 : PetscReal flag,flag0,flag2,aa,bb,*px,*py,*sx,*sy,*pp,*qq,*sq,*sbf,*work,*currentPoint = intv;
106 882 : const PetscInt *hilo = HighLowFlags;
107 :
108 882 : PetscFunctionBegin;
109 882 : m = 2*baseDeg+2;
110 882 : jj = npoints-1; /* jj is initialized as the number of intervals */
111 882 : PetscCall(PetscMalloc5(m,&px,m,&py,m,&pp,m,&qq,m,&work));
112 : sbf = baseFilter;
113 :
114 5292 : while (jj--) { /* the main loop to compute the Chebyshev coefficients */
115 :
116 4410 : flag = (PetscReal)(*hilo++); /* get the flag of the current interval */
117 4410 : if (flag == -1.0) { /* flag == -1 means that the current interval is a transition polynomial */
118 :
119 1764 : flag2 = (PetscReal)(*hilo); /* get flag2, the flag of the next interval */
120 1764 : flag0 = 1.0-flag2; /* the flag of the previous interval is 1-flag2 */
121 :
122 : /* two pointers for traversing x[] and y[] */
123 1764 : sx = px;
124 1764 : sy = py;
125 :
126 : /* find the current interval [aa,bb] */
127 1764 : aa = *currentPoint++;
128 1764 : bb = *currentPoint;
129 :
130 : /* now left-hand side */
131 1764 : ii = baseDeg+1;
132 21168 : while (ii--) {
133 19404 : *sy++ = flag0;
134 19404 : *sx++ = aa;
135 : }
136 :
137 : /* now right-hand side */
138 : ii = baseDeg+1;
139 21168 : while (ii--) {
140 19404 : *sy++ = flag2;
141 19404 : *sx++ = bb;
142 : }
143 :
144 : /* build a Newton polynomial (indeed, the generalized Hermite interpolating polynomial) with x[] and y[] */
145 1764 : PetscCall(FILTLAN_NewtonPolynomial(m,px,py,pp,work));
146 :
147 : /* pp contains coefficients of the Newton polynomial P(z) in the current interval [aa,bb], where
148 : P(z) = pp(1) + pp(2)*(z-px(1)) + pp(3)*(z-px(1))*(z-px(2)) + ... + pp(n)*(z-px(1))*...*(z-px(n-1)) */
149 :
150 : /* translate the Newton coefficients to the Chebyshev coefficients */
151 1764 : PetscCall(FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(m,aa,bb,pp,px,qq,work));
152 : /* qq contains coefficients of the polynomial in [aa,bb] in the `translated' Chebyshev basis */
153 :
154 : /* copy the Chebyshev coefficients to baseFilter
155 : OCTAVE/MATLAB: B(:,j) = qq, where j = (npoints-1)-jj and B is the return matrix */
156 1764 : sq = qq;
157 1764 : ii = 2*baseDeg+2;
158 40572 : while (ii--) *sbf++ = *sq++;
159 :
160 : } else {
161 :
162 : /* a constant polynomial P(z)=flag, where either flag==0 or flag==1
163 : OCTAVE/MATLAB: B(1,j) = flag, where j = (npoints-1)-jj and B is the return matrix */
164 2646 : *sbf++ = flag;
165 :
166 : /* the other coefficients are all zero, since P(z) is a constant
167 : OCTAVE/MATLAB: B(1,j) = 0, where j = (npoints-1)-jj and B is the return matrix */
168 2646 : ii = 2*baseDeg+1;
169 58212 : while (ii--) *sbf++ = 0.0;
170 :
171 : /* for the next point */
172 2646 : currentPoint++;
173 : }
174 : }
175 882 : PetscCall(PetscFree5(px,py,pp,qq,work));
176 882 : PetscFunctionReturn(PETSC_SUCCESS);
177 : }
178 :
179 : /* ////////////////////////////////////////////////////////////////////////////
180 : // Base Filter
181 : //////////////////////////////////////////////////////////////////////////// */
182 :
183 : /*
184 : FILTLAN function GetIntervals
185 :
186 : this routine determines the intervals (including the transition one(s)) by an iterative process
187 :
188 : frame is a vector consisting of 4 ordered elements:
189 : [frame(1),frame(4)] is the interval which (tightly) contains all eigenvalues, and
190 : [frame(2),frame(3)] is the interval in which the eigenvalues are sought
191 : baseDeg is the left-and-right degree of the base filter for each interval
192 : polyDeg is the (maximum possible) degree of s(z), with z*s(z) the polynomial filter
193 : intv is the output; the j-th interval is [intv(j),intv(j+1))
194 : opts is a collection of interval options
195 :
196 : the base filter P(z) is a piecewise polynomial from Hermite interpolation with degree baseDeg
197 : at each end point of intervals
198 :
199 : the polynomial filter Q(z) is in the form z*s(z), i.e. Q(0)==0, such that ||(1-P(z))-z*s(z)||_w is
200 : minimized with s(z) a polynomial of degree up to polyDeg
201 :
202 : the resulting polynomial filter Q(z) satisfies Q(x)>=Q(y) for x in [frame[1],frame[2]], and
203 : y in [frame[0],frame[3]] but not in [frame[1],frame[2]]
204 :
205 : the routine fills a PolynomialFilterInfo struct which gives some information of the polynomial filter
206 : */
207 7 : static PetscErrorCode FILTLAN_GetIntervals(PetscReal *intervals,PetscReal *frame,PetscInt polyDeg,PetscInt baseDeg,FILTLAN_IOP opts,FILTLAN_PFI filterInfo)
208 : {
209 7 : PetscReal intv[6],x,y,z1,z2,c,c1,c2,fc,fc2,halfPlateau,leftDelta,rightDelta,gridSize;
210 7 : PetscReal yLimit,ySummit,yLeftLimit,yRightLimit,bottom,qIndex,*baseFilter,*polyFilter;
211 7 : PetscReal yLimitGap=0.0,yLeftSummit=0.0,yLeftBottom=0.0,yRightSummit=0.0,yRightBottom=0.0;
212 7 : PetscInt i,ii,npoints,numIter,numLeftSteps=1,numRightSteps=1,numMoreLooked=0;
213 7 : PetscBool leftBoundaryMet=PETSC_FALSE,rightBoundaryMet=PETSC_FALSE,stepLeft,stepRight;
214 7 : const PetscReal a=frame[0],a1=frame[1],b1=frame[2],b=frame[3];
215 7 : const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 }; /* if filterType is 1, only first 3 elements will be used */
216 7 : const PetscInt numLookMore = 2*(PetscInt)(0.5+(PetscLogReal(2.0)/PetscLogReal(opts->shiftStepExpanRate)));
217 :
218 7 : PetscFunctionBegin;
219 7 : PetscCheck(a<=a1 && a1<=b1 && b1<=b,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Values in the frame vector should be non-decreasing");
220 7 : PetscCheck(a1!=b1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The range of wanted eigenvalues cannot be of size zero");
221 7 : filterInfo->filterType = 2; /* mid-pass filter, for interior eigenvalues */
222 7 : if (b == b1) {
223 0 : PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"A polynomial filter should not cover all eigenvalues");
224 0 : filterInfo->filterType = 1; /* high-pass filter, for largest eigenvalues */
225 7 : } else PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"filterType==3 for smallest eigenvalues should be pre-converted to filterType==1 for largest eigenvalues");
226 :
227 : /* the following recipe follows Yousef Saad (2005, 2006) with a few minor adaptations / enhancements */
228 7 : halfPlateau = 0.5*(b1-a1)*opts->initialPlateau; /* half length of the "plateau" of the (dual) base filter */
229 7 : leftDelta = (b1-a1)*opts->initialShiftStep; /* initial left shift */
230 7 : rightDelta = leftDelta; /* initial right shift */
231 7 : opts->numGridPoints = PetscMax(opts->numGridPoints,(PetscInt)(2.0*(b-a)/halfPlateau));
232 7 : gridSize = (b-a) / (PetscReal)opts->numGridPoints;
233 :
234 49 : for (i=0;i<6;i++) intv[i] = 0.0;
235 7 : if (filterInfo->filterType == 2) { /* for interior eigenvalues */
236 7 : npoints = 6;
237 7 : intv[0] = a;
238 7 : intv[5] = b;
239 : /* intv[1], intv[2], intv[3], and intv[4] to be determined */
240 : } else { /* filterType == 1 (or 3 with conversion), for extreme eigenvalues */
241 0 : npoints = 4;
242 0 : intv[0] = a;
243 0 : intv[3] = b;
244 : /* intv[1], and intv[2] to be determined */
245 : }
246 7 : z1 = a1 - leftDelta;
247 7 : z2 = b1 + rightDelta;
248 7 : filterInfo->filterOK = 0; /* not yet found any OK filter */
249 :
250 : /* allocate matrices */
251 7 : PetscCall(PetscMalloc2((polyDeg+2)*(npoints-1),&polyFilter,(2*baseDeg+2)*(npoints-1),&baseFilter));
252 :
253 : /* initialize the intervals, mainly for the case opts->maxOuterIter == 0 */
254 7 : intervals[0] = intv[0];
255 7 : intervals[3] = intv[3];
256 7 : intervals[5] = intv[5];
257 7 : intervals[1] = z1;
258 7 : if (filterInfo->filterType == 2) { /* a mid-pass filter for interior eigenvalues */
259 7 : intervals[4] = z2;
260 7 : c = (a1+b1) / 2.0;
261 7 : intervals[2] = c - halfPlateau;
262 7 : intervals[3] = c + halfPlateau;
263 : } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
264 0 : intervals[2] = z1 + (b1-z1)*opts->transIntervalRatio;
265 : }
266 :
267 : /* the main loop */
268 71 : for (numIter=1;numIter<=opts->maxOuterIter;numIter++) {
269 71 : if (z1 <= a) { /* outer loop updates z1 and z2 */
270 18 : z1 = a;
271 18 : leftBoundaryMet = PETSC_TRUE;
272 : }
273 71 : if (filterInfo->filterType == 2) { /* a <= z1 < (a1) */
274 71 : if (z2 >= b) { /* a mid-pass filter for interior eigenvalues */
275 0 : z2 = b;
276 0 : rightBoundaryMet = PETSC_TRUE;
277 : }
278 : /* a <= z1 < c-h < c+h < z2 <= b, where h is halfPlateau */
279 : /* [z1, c-h] and [c+h, z2] are transition interval */
280 71 : intv[1] = z1;
281 71 : intv[4] = z2;
282 71 : c1 = z1 + halfPlateau;
283 71 : intv[2] = z1; /* i.e. c1 - halfPlateau */
284 71 : intv[3] = c1 + halfPlateau;
285 71 : PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
286 71 : PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
287 : /* fc1 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); */
288 71 : c2 = z2 - halfPlateau;
289 71 : intv[2] = c2 - halfPlateau;
290 71 : intv[3] = z2; /* i.e. c2 + halfPlateau */
291 71 : PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
292 71 : PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
293 71 : fc2 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
294 71 : yLimitGap = PETSC_MAX_REAL;
295 71 : ii = opts->maxInnerIter;
296 804 : while (ii-- && !(yLimitGap <= opts->yLimitTol)) {
297 : /* recursive bisection to get c such that p(a1) are p(b1) approximately the same */
298 733 : c = (c1+c2) / 2.0;
299 733 : intv[2] = c - halfPlateau;
300 733 : intv[3] = c + halfPlateau;
301 733 : PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
302 733 : PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
303 733 : fc = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
304 733 : if (fc*fc2 < 0.0) {
305 : c1 = c;
306 : /* fc1 = fc; */
307 : } else {
308 391 : c2 = c;
309 391 : fc2 = fc;
310 : }
311 733 : yLimitGap = PetscAbsReal(fc);
312 : }
313 : } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
314 0 : intv[1] = z1;
315 0 : intv[2] = z1 + (b1-z1)*opts->transIntervalRatio;
316 0 : PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,4,HighLowFlags,baseDeg));
317 0 : PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,4,opts->intervalWeights,polyDeg));
318 : }
319 : /* polyFilter contains the coefficients of the polynomial filter which approximates phi(x)
320 : expanded in the `translated' Chebyshev basis */
321 : /* psi(x) = 1.0 - phi(x) is the dual base filter approximated by a polynomial in the form x*p(x) */
322 71 : yLeftLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
323 71 : yRightLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1);
324 71 : yLimit = (yLeftLimit < yRightLimit) ? yLeftLimit : yRightLimit;
325 71 : ySummit = (yLeftLimit > yRightLimit) ? yLeftLimit : yRightLimit;
326 : x = a1;
327 2840 : while ((x+=gridSize) < b1) {
328 2769 : y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
329 2769 : if (y < yLimit) yLimit = y;
330 2769 : if (y > ySummit) ySummit = y;
331 : }
332 : /* now yLimit is the minimum of x*p(x) for x in [a1, b1] */
333 71 : stepLeft = PETSC_FALSE;
334 71 : stepRight = PETSC_FALSE;
335 71 : if ((yLimit < yLeftLimit && yLimit < yRightLimit) || yLimit < opts->yBottomLine) {
336 : /* very bad, step to see what will happen */
337 25 : stepLeft = PETSC_TRUE;
338 25 : if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
339 46 : } else if (filterInfo->filterType == 2) {
340 46 : if (yLeftLimit < yRightLimit) {
341 21 : if (yRightLimit-yLeftLimit > opts->yLimitTol) stepLeft = PETSC_TRUE;
342 25 : } else if (yLeftLimit-yRightLimit > opts->yLimitTol) stepRight = PETSC_TRUE;
343 : }
344 50 : if (!stepLeft) {
345 : yLeftBottom = yLeftLimit;
346 : x = a1;
347 2100 : while ((x-=gridSize) >= a) {
348 2095 : y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
349 2095 : if (y < yLeftBottom) yLeftBottom = y;
350 41 : else if (y > yLeftBottom) break;
351 : }
352 : yLeftSummit = yLeftBottom;
353 119410 : while ((x-=gridSize) >= a) {
354 119364 : y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
355 119364 : if (y > yLeftSummit) {
356 1713 : yLeftSummit = y;
357 1713 : if (yLeftSummit > yLimit*opts->yRippleLimit) {
358 : stepLeft = PETSC_TRUE;
359 : break;
360 : }
361 : }
362 119364 : if (y < yLeftBottom) yLeftBottom = y;
363 : }
364 : }
365 71 : if (filterInfo->filterType == 2 && !stepRight) {
366 : yRightBottom = yRightLimit;
367 : x = b1;
368 2265 : while ((x+=gridSize) <= b) {
369 2265 : y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
370 2265 : if (y < yRightBottom) yRightBottom = y;
371 46 : else if (y > yRightBottom) break;
372 : }
373 : yRightSummit = yRightBottom;
374 97057 : while ((x+=gridSize) <= b) {
375 97011 : y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
376 97011 : if (y > yRightSummit) {
377 1733 : yRightSummit = y;
378 1733 : if (yRightSummit > yLimit*opts->yRippleLimit) {
379 : stepRight = PETSC_TRUE;
380 : break;
381 : }
382 : }
383 97011 : if (y < yRightBottom) yRightBottom = y;
384 : }
385 : }
386 71 : if (!stepLeft && !stepRight) {
387 46 : if (filterInfo->filterType == 2) bottom = PetscMin(yLeftBottom,yRightBottom);
388 : else bottom = yLeftBottom;
389 46 : qIndex = 1.0 - (yLimit-bottom) / (ySummit-bottom);
390 46 : if (filterInfo->filterOK == 0 || filterInfo->filterQualityIndex < qIndex) {
391 : /* found the first OK filter or a better filter */
392 126 : for (i=0;i<6;i++) intervals[i] = intv[i];
393 18 : filterInfo->filterOK = 1;
394 18 : filterInfo->filterQualityIndex = qIndex;
395 18 : filterInfo->numIter = numIter;
396 18 : filterInfo->yLimit = yLimit;
397 18 : filterInfo->ySummit = ySummit;
398 18 : filterInfo->numLeftSteps = numLeftSteps;
399 18 : filterInfo->yLeftSummit = yLeftSummit;
400 18 : filterInfo->yLeftBottom = yLeftBottom;
401 18 : if (filterInfo->filterType == 2) {
402 18 : filterInfo->yLimitGap = yLimitGap;
403 18 : filterInfo->numRightSteps = numRightSteps;
404 18 : filterInfo->yRightSummit = yRightSummit;
405 18 : filterInfo->yRightBottom = yRightBottom;
406 : }
407 : numMoreLooked = 0;
408 28 : } else if (++numMoreLooked == numLookMore) {
409 : /* filter has been optimal */
410 7 : filterInfo->filterOK = 2;
411 7 : break;
412 : }
413 : /* try stepping further to see whether it can improve */
414 39 : stepLeft = PETSC_TRUE;
415 39 : if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
416 : }
417 : /* check whether we can really "step" */
418 64 : if (leftBoundaryMet) {
419 14 : if (filterInfo->filterType == 1 || rightBoundaryMet) break; /* cannot step further, so break the loop */
420 14 : if (stepLeft) {
421 : /* cannot step left, so try stepping right */
422 14 : stepLeft = PETSC_FALSE;
423 14 : stepRight = PETSC_TRUE;
424 : }
425 : }
426 64 : if (rightBoundaryMet && stepRight) {
427 : /* cannot step right, so try stepping left */
428 : stepRight = PETSC_FALSE;
429 : stepLeft = PETSC_TRUE;
430 : }
431 : /* now "step" */
432 64 : if (stepLeft) {
433 50 : numLeftSteps++;
434 50 : if (filterInfo->filterType == 2) leftDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
435 50 : z1 -= leftDelta;
436 : }
437 64 : if (stepRight) {
438 64 : numRightSteps++;
439 64 : rightDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
440 64 : z2 += rightDelta;
441 : }
442 64 : if (filterInfo->filterType == 2) {
443 : /* shrink the "plateau" of the (dual) base filter */
444 64 : if (stepLeft && stepRight) halfPlateau /= opts->plateauShrinkRate;
445 14 : else halfPlateau /= PetscSqrtReal(opts->plateauShrinkRate);
446 : }
447 : }
448 7 : PetscCheck(filterInfo->filterOK,PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"STFILTER cannot get the filter specified; please adjust your filter parameters (e.g. increasing the polynomial degree)");
449 :
450 7 : filterInfo->totalNumIter = numIter;
451 7 : PetscCall(PetscFree2(polyFilter,baseFilter));
452 7 : PetscFunctionReturn(PETSC_SUCCESS);
453 : }
454 :
455 : /* ////////////////////////////////////////////////////////////////////////////
456 : // Chebyshev Polynomials
457 : //////////////////////////////////////////////////////////////////////////// */
458 :
459 : /*
460 : FILTLAN function ExpandNewtonPolynomialInChebyshevBasis
461 :
462 : translate the coefficients of a Newton polynomial to the coefficients
463 : in a basis of the `translated' (scale-and-shift) Chebyshev polynomials
464 :
465 : input:
466 : a Newton polynomial defined by vectors a and x:
467 : P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1))
468 : the interval [aa,bb] defines the `translated' Chebyshev polynomials S_i(z) = T_i((z-c)/h),
469 : where c=(aa+bb)/2 and h=(bb-aa)/2, and T_i is the Chebyshev polynomial of the first kind
470 : note that T_i is defined by T_0(z)=1, T_1(z)=z, and T_i(z)=2*z*T_{i-1}(z)+T_{i-2}(z) for i>=2
471 :
472 : output:
473 : a vector q containing the Chebyshev coefficients:
474 : P(z) = q(1)*S_0(z) + q(2)*S_1(z) + ... + q(n)*S_{n-1}(z)
475 : */
476 1764 : static inline PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt n,PetscReal aa,PetscReal bb,PetscReal *a,PetscReal *x,PetscReal *q,PetscReal *q2)
477 : {
478 1764 : PetscInt m,mm;
479 1764 : PetscReal *sa,*sx,*sq,*sq2,c,c2,h,h2;
480 :
481 1764 : PetscFunctionBegin;
482 1764 : sa = a+n; /* pointers for traversing a and x */
483 1764 : sx = x+n-1;
484 1764 : *q = *--sa; /* set q[0] = a(n) */
485 :
486 1764 : c = (aa+bb)/2.0;
487 1764 : h = (bb-aa)/2.0;
488 1764 : h2 = h/2.0;
489 :
490 38808 : for (m=1;m<=n-1;m++) { /* the main loop for translation */
491 :
492 : /* compute q2[0:m-1] = (c-x[n-m-1])*q[0:m-1] */
493 37044 : mm = m;
494 37044 : sq = q;
495 37044 : sq2 = q2;
496 37044 : c2 = c-(*--sx);
497 444528 : while (mm--) *(sq2++) = c2*(*sq++);
498 37044 : *sq2 = 0.0; /* q2[m] = 0.0 */
499 37044 : *(q2+1) += h*(*q); /* q2[1] = q2[1] + h*q[0] */
500 :
501 : /* compute q2[0:m-2] = q2[0:m-2] + h2*q[1:m-1] */
502 37044 : mm = m-1;
503 37044 : sq2 = q2;
504 37044 : sq = q+1;
505 407484 : while (mm--) *(sq2++) += h2*(*sq++);
506 :
507 : /* compute q2[2:m] = q2[2:m] + h2*q[1:m-1] */
508 37044 : mm = m-1;
509 37044 : sq2 = q2+2;
510 37044 : sq = q+1;
511 407484 : while (mm--) *(sq2++) += h2*(*sq++);
512 :
513 : /* compute q[0:m] = q2[0:m] */
514 37044 : mm = m+1;
515 37044 : sq2 = q2;
516 37044 : sq = q;
517 481572 : while (mm--) *sq++ = *sq2++;
518 37044 : *q += (*--sa); /* q[0] = q[0] + p[n-m-1] */
519 : }
520 1764 : PetscFunctionReturn(PETSC_SUCCESS);
521 : }
522 :
523 : /*
524 : FILTLAN function PolynomialEvaluationInChebyshevBasis
525 :
526 : evaluate P(z) at z=z0, where P(z) is a polynomial expanded in a basis of
527 : the `translated' (i.e. scale-and-shift) Chebyshev polynomials
528 :
529 : input:
530 : c is a vector of Chebyshev coefficients which defines the polynomial
531 : P(z) = c(1)*S_0(z) + c(2)*S_1(z) + ... + c(n)*S_{n-1}(z),
532 : where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
533 : c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2
534 : note that T_i(z) is the Chebyshev polynomial of the first kind,
535 : T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
536 :
537 : output:
538 : the evaluated value of P(z) at z=z0
539 : */
540 225254 : static inline PetscReal FILTLAN_PolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscInt idx,PetscReal z0,PetscReal aa,PetscReal bb)
541 : {
542 225254 : PetscInt ii,deg1;
543 225254 : PetscReal y,zz,t0,t1,t2,*sc;
544 :
545 225254 : PetscFunctionBegin;
546 225254 : deg1 = m;
547 225254 : if (aa==-1.0 && bb==1.0) zz = z0; /* treat it as a special case to reduce rounding errors */
548 225254 : else zz = (aa==bb)? 0.0 : -1.0+2.0*(z0-aa)/(bb-aa);
549 :
550 : /* compute y = P(z0), where we utilize the Chebyshev recursion */
551 225254 : sc = pp+(idx-1)*m; /* sc points to column idx of pp */
552 225254 : y = *sc++;
553 225254 : t1 = 1.0; t2 = zz;
554 225254 : ii = deg1-1;
555 44395058 : while (ii--) {
556 : /* Chebyshev recursion: T_0(zz)=1, T_1(zz)=zz, and T_{i+1}(zz) = 2*zz*T_i(zz) + T_{i-1}(zz) for i>=2
557 : the values of T_{i+1}(zz), T_i(zz), T_{i-1}(zz) are stored in t0, t1, t2, respectively */
558 44169804 : t0 = 2*zz*t1 - t2;
559 : /* it also works for the base case / the first iteration, where t0 equals 2*zz*1-zz == zz which is T_1(zz) */
560 44169804 : t2 = t1;
561 44169804 : t1 = t0;
562 44169804 : y += (*sc++)*t0;
563 : }
564 225254 : PetscFunctionReturn(y);
565 : }
566 :
567 : #define basisTranslated PETSC_TRUE
568 : /*
569 : FILTLAN function PiecewisePolynomialEvaluationInChebyshevBasis
570 :
571 : evaluate P(z) at z=z0, where P(z) is a piecewise polynomial expanded
572 : in a basis of the (optionally translated, i.e. scale-and-shift) Chebyshev polynomials for each interval
573 :
574 : input:
575 : intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
576 : pp is a matrix of Chebyshev coefficients which defines a piecewise polynomial P(z)
577 : in a basis of the `translated' Chebyshev polynomials in each interval
578 : the polynomial P_j(z) in the j-th interval, i.e. when z is in [intv(j), intv(j+1)), is defined by the j-th column of pp:
579 : if basisTranslated == false, then
580 : P_j(z) = pp(1,j)*T_0(z) + pp(2,j)*T_1(z) + ... + pp(n,j)*T_{n-1}(z),
581 : where T_i(z) is the Chebyshev polynomial of the first kind,
582 : T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
583 : if basisTranslated == true, then
584 : P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
585 : where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
586 : c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2
587 :
588 : output:
589 : the evaluated value of P(z) at z=z0
590 :
591 : note that if z0 falls below the first interval, then the polynomial in the first interval will be used to evaluate P(z0)
592 : if z0 flies over the last interval, then the polynomial in the last interval will be used to evaluate P(z0)
593 : */
594 225254 : static inline PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscReal *intv,PetscInt npoints,PetscReal z0)
595 : {
596 225254 : PetscReal *sintv,aa,bb,resul;
597 225254 : PetscInt idx;
598 :
599 225254 : PetscFunctionBegin;
600 : /* determine the interval which contains z0 */
601 225254 : sintv = &intv[1];
602 225254 : idx = 1;
603 225254 : if (npoints>2 && z0 >= *sintv) {
604 104400 : sintv++;
605 405724 : while (++idx < npoints-1) {
606 307322 : if (z0 < *sintv) break;
607 301324 : sintv++;
608 : }
609 : }
610 : /* idx==1 if npoints<=2; otherwise idx satisfies:
611 : intv(idx) <= z0 < intv(idx+1), if 2 <= idx <= npoints-2
612 : z0 < intv(idx+1), if idx == 1
613 : intv(idx) <= z0, if idx == npoints-1
614 : in addition, sintv points to &intv(idx+1) */
615 225254 : if (basisTranslated) {
616 : /* the basis consists of `translated' Chebyshev polynomials */
617 : /* find the interval of concern, [aa,bb] */
618 225254 : aa = *(sintv-1);
619 225254 : bb = *sintv;
620 225254 : resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,aa,bb);
621 : } else {
622 : /* the basis consists of standard Chebyshev polynomials, with interval [-1.0,1.0] for integration */
623 : resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,-1.0,1.0);
624 : }
625 225254 : PetscFunctionReturn(resul);
626 : }
627 :
628 : /*
629 : FILTLAN function PiecewisePolynomialInnerProductInChebyshevBasis
630 :
631 : compute the weighted inner product of two piecewise polynomials expanded
632 : in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
633 :
634 : pp and qq are two matrices of Chebyshev coefficients which define the piecewise polynomials P(z) and Q(z), respectively
635 : for z in the j-th interval, P(z) equals
636 : P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
637 : and Q(z) equals
638 : Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
639 : where S_i(z) is the `translated' Chebyshev polynomial in that interval,
640 : S_i((z-c)/h) = T_i(z), c = (aa+bb)) / 2, h = (bb-aa) / 2,
641 : with T_i(z) the Chebyshev polynomial of the first kind
642 : T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
643 :
644 : the (scaled) j-th interval inner product is defined by
645 : <P_j,Q_j> = (Pi/2)*(pp(1,j)*qq(1,j) + sum_{k} pp(k,j)*qq(k,j)),
646 : which comes from the property
647 : <T_0,T_0>=pi, <T_i,T_i>=pi/2 for i>=1, and <T_i,T_j>=0 for i!=j
648 :
649 : the weighted inner product is <P,Q> = sum_{j} intervalWeights(j)*<P_j,Q_j>,
650 : which is the return value
651 :
652 : note that for unit weights, pass an empty vector of intervalWeights (i.e. of length 0)
653 : */
654 603667 : static inline PetscReal FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(PetscReal *pp,PetscInt prows,PetscInt pcols,PetscInt ldp,PetscReal *qq,PetscInt qrows,PetscInt qcols,PetscInt ldq,const PetscReal *intervalWeights)
655 : {
656 603667 : PetscInt nintv,deg1,i,k;
657 603667 : PetscReal *sp,*sq,ans=0.0,ans2;
658 :
659 603667 : PetscFunctionBegin;
660 603667 : deg1 = PetscMin(prows,qrows); /* number of effective coefficients, one more than the effective polynomial degree */
661 603667 : if (PetscUnlikely(!deg1)) PetscFunctionReturn(0.0);
662 603667 : nintv = PetscMin(pcols,qcols); /* number of intervals */
663 :
664 : /* scaled by intervalWeights(i) in the i-th interval (we assume intervalWeights[] are always provided).
665 : compute ans = sum_{i=1,...,nintv} intervalWeights(i)*[ pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) ] */
666 3622002 : for (i=0;i<nintv;i++) { /* compute ans2 = pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) */
667 3018335 : sp = pp+i*ldp;
668 3018335 : sq = qq+i*ldq;
669 3018335 : ans2 = (*sp) * (*sq); /* the first term pp(1,i)*qq(1,i) is being added twice */
670 171892105 : for (k=0;k<deg1;k++) ans2 += (*sp++)*(*sq++); /* add pp(k,i)*qq(k,i) */
671 3018335 : ans += ans2*intervalWeights[i]; /* compute ans += ans2*intervalWeights(i) */
672 : }
673 603667 : PetscFunctionReturn(ans*PETSC_PI/2.0);
674 : }
675 :
676 : /*
677 : FILTLAN function PiecewisePolynomialInChebyshevBasisMultiplyX
678 :
679 : compute Q(z) = z*P(z), where P(z) and Q(z) are piecewise polynomials expanded
680 : in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
681 :
682 : P(z) and Q(z) are stored as matrices of Chebyshev coefficients pp and qq, respectively
683 :
684 : For z in the j-th interval, P(z) equals
685 : P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
686 : and Q(z) equals
687 : Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
688 : where S_i(z) is the `translated' Chebyshev polynomial in that interval,
689 : S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
690 : with T_i(z) the Chebyshev polynomial of the first kind
691 : T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
692 :
693 : the returned matrix is qq which represents Q(z) = z*P(z)
694 : */
695 201667 : static inline PetscErrorCode FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(PetscReal *pp,PetscInt deg1,PetscInt ldp,PetscReal *intv,PetscInt nintv,PetscReal *qq,PetscInt ldq)
696 : {
697 201667 : PetscInt i,j;
698 201667 : PetscReal c,h,h2,tmp,*sp,*sq,*sp2,*sq2;
699 :
700 201667 : PetscFunctionBegin;
701 1210002 : for (j=0;j<nintv;j++) { /* consider interval between intv(j) and intv(j+1) */
702 1008335 : sp = pp+j*ldp;
703 1008335 : sq = qq+j*ldq;
704 1008335 : sp2 = sp;
705 1008335 : sq2 = sq+1;
706 1008335 : c = 0.5*(intv[j] + intv[j+1]); /* compute c = (intv(j) + intv(j+1))/2 */
707 1008335 : h = 0.5*(intv[j+1] - intv[j]); /* compute h = (intv(j+1) - intv(j))/2 and h2 = h/2 */
708 1008335 : h2 = 0.5*h;
709 1008335 : i = deg1;
710 74837005 : while (i--) *sq++ = c*(*sp++); /* compute q(1:deg1,j) = c*p(1:deg1,j) */
711 1008335 : *sq++ = 0.0; /* set q(deg1+1,j) = 0.0 */
712 1008335 : *(sq2++) += h*(*sp2++); /* compute q(2,j) = q(2,j) + h*p(1,j) */
713 1008335 : i = deg1-1;
714 73828670 : while (i--) { /* compute q(3:deg1+1,j) += h2*p(2:deg1,j) and then q(1:deg1-1,j) += h2*p(2:deg1,j) */
715 72820335 : tmp = h2*(*sp2++);
716 72820335 : *(sq2-2) += tmp;
717 72820335 : *(sq2++) += tmp;
718 : }
719 : }
720 201667 : PetscFunctionReturn(PETSC_SUCCESS);
721 : }
722 :
723 : /* ////////////////////////////////////////////////////////////////////////////
724 : // Conjugate Residual Method in the Polynomial Space
725 : //////////////////////////////////////////////////////////////////////////// */
726 :
727 : /*
728 : B := alpha*A + beta*B
729 :
730 : A,B are nxk
731 : */
732 802250 : static inline PetscErrorCode Mat_AXPY_BLAS(PetscInt n,PetscInt k,PetscReal alpha,const PetscReal *A,PetscInt lda,PetscReal beta,PetscReal *B,PetscInt ldb)
733 : {
734 802250 : PetscInt i,j;
735 :
736 802250 : PetscFunctionBegin;
737 802250 : if (beta==(PetscReal)1.0) {
738 151212000 : for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] += alpha*A[i+j*lda];
739 402000 : PetscCall(PetscLogFlops(2.0*n*k));
740 : } else {
741 151028625 : for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] = alpha*A[i+j*lda] + beta*B[i+j*ldb];
742 400250 : PetscCall(PetscLogFlops(3.0*n*k));
743 : }
744 802250 : PetscFunctionReturn(PETSC_SUCCESS);
745 : }
746 :
747 : /*
748 : FILTLAN function FilteredConjugateResidualPolynomial
749 :
750 : ** Conjugate Residual Method in the Polynomial Space
751 :
752 : this routine employs a conjugate-residual-type algorithm in polynomial space to minimize ||P(z)-Q(z)||_w,
753 : where P(z), the base filter, is the input piecewise polynomial, and
754 : Q(z) is the output polynomial satisfying Q(0)==1, i.e. the constant term of Q(z) is 1
755 : niter is the number of conjugate-residual iterations; therefore, the degree of Q(z) is up to niter+1
756 : both P(z) and Q(z) are expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
757 : and presented as matrices of Chebyshev coefficients, denoted by pp and qq, respectively
758 :
759 : input:
760 : intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
761 : w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
762 : the interval weights define the inner product of two continuous functions and then
763 : the derived w-norm ||P(z)-Q(z)||_w
764 : pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
765 : to be specific, for z in [intv(j), intv(j+1)), P(z) equals
766 : P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
767 : where S_i(z) is the `translated' Chebyshev polynomial in that interval,
768 : S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
769 : with T_i(z) the Chebyshev polynomial of the first kind,
770 : T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
771 :
772 : output:
773 : the return matrix, denoted by qq, represents a polynomial Q(z) with degree up to 1+niter
774 : and satisfying Q(0)==1, such that ||P(z))-Q(z)||_w is minimized
775 : this polynomial Q(z) is expanded in the `translated' Chebyshev basis for each interval
776 : to be precise, considering z in [intv(j), intv(j+1)), Q(z) equals
777 : Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(niter+2,j)*S_{niter+1}(z)
778 :
779 : note:
780 : 1. since Q(0)==1, P(0)==1 is expected; if P(0)!=1, one can translate P(z)
781 : for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
782 : 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
783 : in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
784 : */
785 875 : static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal *cpol,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt m,PetscReal *intervalWeights,PetscInt niter)
786 : {
787 875 : PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld,nintv;
788 875 : PetscReal rho,rho0,rho1,den,bet,alp,alp0,*ppol,*rpol,*appol,*arpol;
789 :
790 875 : PetscFunctionBegin;
791 875 : nintv = m-1;
792 875 : ld = niter+2; /* leading dimension */
793 875 : PetscCall(PetscCalloc4(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&appol,ld*nintv,&arpol));
794 875 : PetscCall(PetscArrayzero(cpol,ld*nintv));
795 : /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
796 5250 : sppol = 2;
797 5250 : srpol = 2;
798 5250 : scpol = 2;
799 5250 : for (j=0;j<nintv;j++) {
800 4375 : ppol[j*ld] = 1.0;
801 4375 : rpol[j*ld] = 1.0;
802 4375 : cpol[j*ld] = 1.0;
803 : }
804 : /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
805 : rpol is the r-polynomial (corresponding to the residual vector r in CG)
806 : cpol is the "corrected" residual polynomial (result of this function) */
807 875 : sappol = 3;
808 875 : sarpol = 3;
809 875 : PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
810 16625 : for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
811 875 : rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
812 116475 : for (i=0;i<niter;i++) {
813 115600 : den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
814 115600 : alp0 = rho/den;
815 115600 : rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
816 115600 : alp = (rho-rho1)/den;
817 115600 : srpol++;
818 115600 : scpol++;
819 115600 : PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
820 115600 : PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
821 115600 : if (i+1 == niter) break;
822 114725 : sarpol++;
823 114725 : PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
824 114725 : rho0 = rho;
825 114725 : rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
826 114725 : bet = rho / rho0;
827 114725 : sppol++;
828 114725 : sappol++;
829 114725 : PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
830 114725 : PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
831 : }
832 875 : PetscCall(PetscFree4(ppol,rpol,appol,arpol));
833 875 : PetscFunctionReturn(PETSC_SUCCESS);
834 : }
835 :
836 : /*
837 : FILTLAN function FilteredConjugateResidualMatrixPolynomialVectorProduct
838 :
839 : this routine employs a conjugate-residual-type algorithm in polynomial space to compute
840 : x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized, where
841 : P(z) is a given piecewise polynomial, called the base filter,
842 : s(z) is a polynomial of degree up to niter, the number of conjugate-residual iterations,
843 : and b and x0 are given vectors
844 :
845 : note that P(z) is expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
846 : and presented as a matrix of Chebyshev coefficients pp
847 :
848 : input:
849 : A is a sparse matrix
850 : x0, b are vectors
851 : niter is the number of conjugate-residual iterations
852 : intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
853 : w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
854 : the interval weights define the inner product of two continuous functions and then
855 : the derived w-norm ||P(z)-Q(z)||_w
856 : pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
857 : to be specific, for z in [intv(j), intv(j+1)), P(z) equals
858 : P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
859 : where S_i(z) is the `translated' Chebyshev polynomial in that interval,
860 : S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
861 : with T_i(z) the Chebyshev polynomial of the first kind,
862 : T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
863 : tol is the tolerance; if the residual polynomial in z-norm is dropped by a factor lower
864 : than tol, then stop the conjugate-residual iteration
865 :
866 : output:
867 : the return vector is x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized,
868 : subject to that s(z) is a polynomial of degree up to niter, where P(z) is the base filter
869 : in short, z*s(z) approximates 1-P(z)
870 :
871 : note:
872 : 1. since z*s(z) approximates 1-P(z), P(0)==1 is expected; if P(0)!=1, one can translate P(z)
873 : for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
874 : 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
875 : in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
876 : 3. a common application is to compute R(A)*b, where R(z) approximates 1-P(z)
877 : in this case, one can set x0 = 0 and then the return vector is x = s(A)*b, where
878 : z*s(z) approximates 1-P(z); therefore, A*x is the wanted R(A)*b
879 : */
880 642 : static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(Mat A,Vec b,Vec x,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work)
881 : {
882 642 : PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld;
883 642 : PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
884 642 : Vec r=work[0],p=work[1],ap=work[2],w=work[3];
885 642 : PetscScalar alpha;
886 :
887 642 : PetscFunctionBegin;
888 642 : ld = niter+3; /* leading dimension */
889 642 : PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol));
890 : /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
891 3852 : sppol = 2;
892 3852 : srpol = 2;
893 3852 : scpol = 2;
894 3852 : for (j=0;j<nintv;j++) {
895 3210 : ppol[j*ld] = 1.0;
896 3210 : rpol[j*ld] = 1.0;
897 3210 : cpol[j*ld] = 1.0;
898 : }
899 : /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
900 : rpol is the r-polynomial (corresponding to the residual vector r in CG)
901 : cpol is the "corrected" residual polynomial */
902 642 : sappol = 3;
903 642 : sarpol = 3;
904 642 : PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
905 12198 : for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
906 642 : rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
907 642 : rho = rho00;
908 :
909 : /* corrected CR in vector space */
910 : /* we assume x0 is always 0 */
911 642 : PetscCall(VecSet(x,0.0));
912 642 : PetscCall(VecCopy(b,r)); /* initial residual r = b-A*x0 */
913 642 : PetscCall(VecCopy(r,p)); /* p = r */
914 642 : PetscCall(MatMult(A,p,ap)); /* ap = A*p */
915 :
916 81042 : for (i=0;i<niter;i++) {
917 : /* iteration in the polynomial space */
918 80400 : den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
919 80400 : alp0 = rho/den;
920 80400 : rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
921 80400 : alp = (rho-rho1)/den;
922 80400 : srpol++;
923 80400 : scpol++;
924 80400 : PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
925 80400 : PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
926 80400 : sarpol++;
927 80400 : PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
928 80400 : rho0 = rho;
929 80400 : rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
930 :
931 : /* update x in the vector space */
932 80400 : alpha = alp;
933 80400 : PetscCall(VecAXPY(x,alpha,p)); /* x += alp*p */
934 80400 : if (rho < tol*rho00) break;
935 :
936 : /* finish the iteration in the polynomial space */
937 80400 : bet = rho / rho0;
938 80400 : sppol++;
939 80400 : sappol++;
940 80400 : PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
941 80400 : PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
942 :
943 : /* finish the iteration in the vector space */
944 80400 : alpha = -alp0;
945 80400 : PetscCall(VecAXPY(r,alpha,ap)); /* r -= alp0*ap */
946 80400 : alpha = bet;
947 80400 : PetscCall(VecAYPX(p,alpha,r)); /* p = r + bet*p */
948 80400 : PetscCall(MatMult(A,r,w)); /* ap = A*r + bet*ap */
949 80400 : PetscCall(VecAYPX(ap,alpha,w));
950 : }
951 642 : PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol));
952 642 : PetscFunctionReturn(PETSC_SUCCESS);
953 : }
954 :
955 : /*
956 : Gateway to FILTLAN for evaluating y=p(A)*x
957 : */
958 642 : static PetscErrorCode MatMult_FILTLAN(Mat A,Vec x,Vec y)
959 : {
960 642 : ST st;
961 642 : ST_FILTER *ctx;
962 642 : PetscInt npoints;
963 :
964 642 : PetscFunctionBegin;
965 642 : PetscCall(MatShellGetContext(A,&st));
966 642 : ctx = (ST_FILTER*)st->data;
967 642 : npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
968 642 : PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(ctx->T,x,y,ctx->baseFilter,2*ctx->baseDegree+2,ctx->intervals,npoints-1,ctx->opts->intervalWeights,ctx->polyDegree,st->work));
969 642 : PetscCall(VecCopy(y,st->work[0]));
970 642 : PetscCall(MatMult(ctx->T,st->work[0],y));
971 642 : PetscFunctionReturn(PETSC_SUCCESS);
972 : }
973 :
974 : /* Block version of FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct */
975 25 : static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(Mat A,Mat B,Mat C,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work,Mat R,Mat P,Mat AP,Mat W)
976 : {
977 25 : PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld;
978 25 : PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
979 25 : PetscScalar alpha;
980 :
981 25 : PetscFunctionBegin;
982 25 : ld = niter+3; /* leading dimension */
983 25 : PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol));
984 : /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
985 150 : sppol = 2;
986 150 : srpol = 2;
987 150 : scpol = 2;
988 150 : for (j=0;j<nintv;j++) {
989 125 : ppol[j*ld] = 1.0;
990 125 : rpol[j*ld] = 1.0;
991 125 : cpol[j*ld] = 1.0;
992 : }
993 : /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
994 : rpol is the r-polynomial (corresponding to the residual vector r in CG)
995 : cpol is the "corrected" residual polynomial */
996 25 : sappol = 3;
997 25 : sarpol = 3;
998 25 : PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
999 475 : for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
1000 25 : rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
1001 25 : rho = rho00;
1002 :
1003 : /* corrected CR in vector space */
1004 : /* we assume x0 is always 0 */
1005 25 : PetscCall(MatZeroEntries(C));
1006 25 : PetscCall(MatCopy(B,R,SAME_NONZERO_PATTERN)); /* initial residual r = b-A*x0 */
1007 25 : PetscCall(MatCopy(R,P,SAME_NONZERO_PATTERN)); /* p = r */
1008 25 : PetscCall(MatMatMult(A,P,MAT_REUSE_MATRIX,PETSC_DEFAULT,&AP)); /* ap = A*p */
1009 :
1010 5025 : for (i=0;i<niter;i++) {
1011 : /* iteration in the polynomial space */
1012 5000 : den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
1013 5000 : alp0 = rho/den;
1014 5000 : rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
1015 5000 : alp = (rho-rho1)/den;
1016 5000 : srpol++;
1017 5000 : scpol++;
1018 5000 : PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
1019 5000 : PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
1020 5000 : sarpol++;
1021 5000 : PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
1022 5000 : rho0 = rho;
1023 5000 : rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
1024 :
1025 : /* update x in the vector space */
1026 5000 : alpha = alp;
1027 5000 : PetscCall(MatAXPY(C,alpha,P,SAME_NONZERO_PATTERN)); /* x += alp*p */
1028 5000 : if (rho < tol*rho00) break;
1029 :
1030 : /* finish the iteration in the polynomial space */
1031 5000 : bet = rho / rho0;
1032 5000 : sppol++;
1033 5000 : sappol++;
1034 5000 : PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
1035 5000 : PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
1036 :
1037 : /* finish the iteration in the vector space */
1038 5000 : alpha = -alp0;
1039 5000 : PetscCall(MatAXPY(R,alpha,AP,SAME_NONZERO_PATTERN)); /* r -= alp0*ap */
1040 5000 : alpha = bet;
1041 5000 : PetscCall(MatAYPX(P,alpha,R,SAME_NONZERO_PATTERN)); /* p = r + bet*p */
1042 5000 : PetscCall(MatMatMult(A,R,MAT_REUSE_MATRIX,PETSC_DEFAULT,&W)); /* ap = A*r + bet*ap */
1043 5000 : PetscCall(MatAYPX(AP,alpha,W,SAME_NONZERO_PATTERN));
1044 : }
1045 25 : PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol));
1046 25 : PetscFunctionReturn(PETSC_SUCCESS);
1047 : }
1048 :
1049 : /*
1050 : Gateway to FILTLAN for evaluating C=p(A)*B
1051 : */
1052 25 : static PetscErrorCode MatMatMult_FILTLAN(Mat A,Mat B,Mat C,void *pctx)
1053 : {
1054 25 : ST st;
1055 25 : ST_FILTER *ctx;
1056 25 : PetscInt i,m1,m2,npoints;
1057 :
1058 25 : PetscFunctionBegin;
1059 25 : PetscCall(MatShellGetContext(A,&st));
1060 25 : ctx = (ST_FILTER*)st->data;
1061 25 : npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
1062 25 : if (ctx->nW) { /* check if work matrices must be resized */
1063 24 : PetscCall(MatGetSize(B,NULL,&m1));
1064 24 : PetscCall(MatGetSize(ctx->W[0],NULL,&m2));
1065 24 : if (m1!=m2) {
1066 3 : PetscCall(MatDestroyMatrices(ctx->nW,&ctx->W));
1067 3 : ctx->nW = 0;
1068 : }
1069 : }
1070 25 : if (!ctx->nW) { /* allocate work matrices */
1071 4 : ctx->nW = 4;
1072 4 : PetscCall(PetscMalloc1(ctx->nW,&ctx->W));
1073 20 : for (i=0;i<ctx->nW;i++) PetscCall(MatDuplicate(B,MAT_DO_NOT_COPY_VALUES,&ctx->W[i]));
1074 : }
1075 25 : PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(ctx->T,B,ctx->W[0],ctx->baseFilter,2*ctx->baseDegree+2,ctx->intervals,npoints-1,ctx->opts->intervalWeights,ctx->polyDegree,st->work,C,ctx->W[1],ctx->W[2],ctx->W[3]));
1076 25 : PetscCall(MatMatMult(ctx->T,ctx->W[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&C));
1077 25 : PetscFunctionReturn(PETSC_SUCCESS);
1078 : }
1079 :
1080 : /*
1081 : FILTLAN function PolynomialFilterInterface::setFilter
1082 :
1083 : Creates the shifted (and scaled) matrix and the base filter P(z).
1084 : M is a shell matrix whose MatMult() applies the filter.
1085 : */
1086 8 : PetscErrorCode STFilter_FILTLAN_setFilter(ST st,Mat *G)
1087 : {
1088 8 : ST_FILTER *ctx = (ST_FILTER*)st->data;
1089 8 : PetscInt i,npoints,n,m,N,M;
1090 8 : PetscReal frame2[4];
1091 8 : PetscScalar alpha;
1092 8 : const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 };
1093 :
1094 8 : PetscFunctionBegin;
1095 8 : if (ctx->frame[0] == ctx->frame[1]) { /* low pass filter, convert it to high pass filter */
1096 : /* T = frame[3]*eye(n) - A */
1097 0 : PetscCall(MatDestroy(&ctx->T));
1098 0 : PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T));
1099 0 : PetscCall(MatScale(ctx->T,-1.0));
1100 0 : alpha = ctx->frame[3];
1101 0 : PetscCall(MatShift(ctx->T,alpha));
1102 0 : for (i=0;i<4;i++) frame2[i] = ctx->frame[3] - ctx->frame[3-i];
1103 : } else { /* it can be a mid-pass filter or a high-pass filter */
1104 8 : if (ctx->frame[0] == 0.0) {
1105 0 : PetscCall(PetscObjectReference((PetscObject)st->A[0]));
1106 0 : PetscCall(MatDestroy(&ctx->T));
1107 0 : ctx->T = st->A[0];
1108 0 : for (i=0;i<4;i++) frame2[i] = ctx->frame[i];
1109 : } else {
1110 : /* T = A - frame[0]*eye(n) */
1111 8 : PetscCall(MatDestroy(&ctx->T));
1112 8 : PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T));
1113 8 : alpha = -ctx->frame[0];
1114 8 : PetscCall(MatShift(ctx->T,alpha));
1115 40 : for (i=0;i<4;i++) frame2[i] = ctx->frame[i] - ctx->frame[0];
1116 : }
1117 : }
1118 :
1119 : /* no need to recompute filter if the parameters did not change */
1120 8 : if (st->state==ST_STATE_INITIAL || ctx->filtch) {
1121 7 : PetscCall(FILTLAN_GetIntervals(ctx->intervals,frame2,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo));
1122 : /* translate the intervals back */
1123 7 : if (ctx->frame[0] == ctx->frame[1]) { /* low pass filter, convert it to high pass filter */
1124 0 : for (i=0;i<4;i++) ctx->intervals2[i] = ctx->frame[3] - ctx->intervals[3-i];
1125 : } else { /* it can be a mid-pass filter or a high-pass filter */
1126 7 : if (ctx->frame[0] == 0.0) {
1127 0 : for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i];
1128 : } else {
1129 49 : for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i] + ctx->frame[0];
1130 : }
1131 : }
1132 :
1133 7 : npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
1134 7 : PetscCall(PetscFree(ctx->baseFilter));
1135 7 : PetscCall(PetscMalloc1((2*ctx->baseDegree+2)*(npoints-1),&ctx->baseFilter));
1136 7 : PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(ctx->baseFilter,ctx->intervals,npoints,HighLowFlags,ctx->baseDegree));
1137 7 : PetscCall(PetscInfo(st,"Computed value of yLimit = %g\n",(double)ctx->filterInfo->yLimit));
1138 : }
1139 8 : ctx->filtch = PETSC_FALSE;
1140 :
1141 : /* create shell matrix*/
1142 8 : if (!*G) {
1143 7 : PetscCall(MatGetSize(ctx->T,&N,&M));
1144 7 : PetscCall(MatGetLocalSize(ctx->T,&n,&m));
1145 7 : PetscCall(MatCreateShell(PetscObjectComm((PetscObject)st),n,m,N,M,st,G));
1146 7 : PetscCall(MatShellSetOperation(*G,MATOP_MULT,(void(*)(void))MatMult_FILTLAN));
1147 7 : PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSE,MATDENSE));
1148 7 : PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSECUDA,MATDENSECUDA));
1149 7 : PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSEHIP,MATDENSEHIP));
1150 : }
1151 8 : PetscFunctionReturn(PETSC_SUCCESS);
1152 : }
|