GCC Code Coverage Report


Directory: ./
File: src/sys/classes/st/impls/filter/filtlan.c
Date: 2025-10-03 04:28:47
Exec Total Coverage
Lines: 585 609 96.1%
Functions: 18 18 100.0%
Branches: 716 1158 61.8%

Line Branch Exec Source
1 /*
2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 SLEPc - Scalable Library for Eigenvalue Problem Computations
4 Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5
6 This file is part of SLEPc.
7 SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 */
10 /*
11 This file is an adaptation of several subroutines from FILTLAN, the
12 Filtered Lanczos Package, authored by Haw-ren Fang and Yousef Saad.
13
14 More information at:
15 https://www-users.cs.umn.edu/~saad/software/filtlan
16
17 References:
18
19 [1] H. Fang and Y. Saad, "A filtered Lanczos procedure for extreme and interior
20 eigenvalue problems", SIAM J. Sci. Comput. 34(4):A2220-A2246, 2012.
21 */
22
23 #include <slepc/private/stimpl.h>
24 #include "filter.h"
25
26 static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal*,PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal*,PetscInt);
27 static PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal);
28 static PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt,PetscReal,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*);
29
30 /* ////////////////////////////////////////////////////////////////////////////
31 // Newton - Hermite Polynomial Interpolation
32 //////////////////////////////////////////////////////////////////////////// */
33
34 /*
35 FILTLAN function NewtonPolynomial
36
37 build P(z) by Newton's divided differences in the form
38 P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1)),
39 such that P(x(i)) = y(i) for i=1,...,n, where
40 x,y are input vectors of length n, and a is the output vector of length n
41 if x(i)==x(j) for some i!=j, then it is assumed that the derivative of P(z) is to be zero at x(i),
42 and the Hermite polynomial interpolation is applied
43 in general, if there are k x(i)'s with the same value x0, then
44 the j-th order derivative of P(z) is zero at z=x0 for j=1,...,k-1
45 */
46 19496 static inline PetscErrorCode FILTLAN_NewtonPolynomial(PetscInt n,PetscReal *x,PetscReal *y,PetscReal *sa,PetscReal *sf)
47 {
48 19496 PetscReal d,*sx=x,*sy=y;
49 19496 PetscInt j,k;
50
51
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
19496 PetscFunctionBegin;
52
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
19496 PetscCall(PetscArraycpy(sf,sy,n));
53
54 /* apply Newton's finite difference method */
55 19496 sa[0] = sf[0];
56
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
428912 for (j=1;j<n;j++) {
57
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
4912992 for (k=n-1;k>=j;k--) {
58 4503576 d = sx[k]-sx[k-j];
59
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
4503576 if (PetscUnlikely(d == 0.0)) sf[k] = 0.0; /* assume that the derivative is 0.0 and apply the Hermite interpolation */
60 2167836 else sf[k] = (sf[k]-sf[k-1]) / d;
61 }
62 409416 sa[j] = sf[j];
63 }
64
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
3544 PetscFunctionReturn(PETSC_SUCCESS);
65 }
66
67 /*
68 FILTLAN function HermiteBaseFilterInChebyshevBasis
69
70 compute a base filter P(z) which is a continuous, piecewise polynomial P(z) expanded
71 in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials in each interval
72
73 The base filter P(z) equals P_j(z) for z in the j-th interval [intv(j), intv(j+1)), where
74 P_j(z) a Hermite interpolating polynomial
75
76 input:
77 intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
78 HiLowFlags determines the shape of the base filter P(z)
79 Consider the j-th interval [intv(j), intv(j+1)]
80 HighLowFlag[j-1]==1, P(z)==1 for z in [intv(j), intv(j+1)]
81 ==0, P(z)==0 for z in [intv(j), intv(j+1)]
82 ==-1, [intv(j), intv(j+1)] is a transition interval;
83 P(intv(j)) and P(intv(j+1)) are defined such that P(z) is continuous
84 baseDeg is the degree of smoothness of the Hermite (piecewise polynomial) interpolation
85 to be precise, the i-th derivative of P(z) is zero, i.e. d^{i}P(z)/dz^i==0, at all interval
86 end points z=intv(j) for i=1,...,baseDeg
87
88 output:
89 P(z) expanded in a basis of `translated' (scale-and-shift) Chebyshev polynomials
90 to be precise, for z in the j-th interval [intv(j),intv(j+1)), P(z) equals
91 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
92 where S_i(z) is the `translated' Chebyshev polynomial in that interval,
93 S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
94 with T_i(z) the Chebyshev polynomial of the first kind,
95 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
96 the return matrix is the matrix of Chebyshev coefficients pp just described
97
98 note that the degree of P(z) in each interval is (at most) 2*baseDeg+1, with 2*baseDeg+2 coefficients
99 let n be the length of intv; then there are n-1 intervals
100 therefore the return matrix pp is of size (2*baseDeg+2)-by-(n-1)
101 */
102 9748 static PetscErrorCode FILTLAN_HermiteBaseFilterInChebyshevBasis(PetscReal *baseFilter,PetscReal *intv,PetscInt npoints,const PetscInt *HighLowFlags,PetscInt baseDeg)
103 {
104 9748 PetscInt m,ii,jj;
105 9748 PetscReal flag,flag0,flag2,aa,bb,*px,*py,*sx,*sy,*pp,*qq,*sq,*sbf,*work,*currentPoint = intv;
106 9748 const PetscInt *hilo = HighLowFlags;
107
108
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
9748 PetscFunctionBegin;
109 9748 m = 2*baseDeg+2;
110 9748 jj = npoints-1; /* jj is initialized as the number of intervals */
111
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
9748 PetscCall(PetscMalloc5(m,&px,m,&py,m,&pp,m,&qq,m,&work));
112 sbf = baseFilter;
113
114
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
58488 while (jj--) { /* the main loop to compute the Chebyshev coefficients */
115
116 48740 flag = (PetscReal)(*hilo++); /* get the flag of the current interval */
117
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
48740 if (flag == -1.0) { /* flag == -1 means that the current interval is a transition polynomial */
118
119 19496 flag2 = (PetscReal)(*hilo); /* get flag2, the flag of the next interval */
120 19496 flag0 = 1.0-flag2; /* the flag of the previous interval is 1-flag2 */
121
122 /* two pointers for traversing x[] and y[] */
123 19496 sx = px;
124 19496 sy = py;
125
126 /* find the current interval [aa,bb] */
127 19496 aa = *currentPoint++;
128 19496 bb = *currentPoint;
129
130 /* now left-hand side */
131 19496 ii = baseDeg+1;
132
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
233952 while (ii--) {
133 214456 *sy++ = flag0;
134 214456 *sx++ = aa;
135 }
136
137 /* now right-hand side */
138 ii = baseDeg+1;
139
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
233952 while (ii--) {
140 214456 *sy++ = flag2;
141 214456 *sx++ = bb;
142 }
143
144 /* build a Newton polynomial (indeed, the generalized Hermite interpolating polynomial) with x[] and y[] */
145
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
19496 PetscCall(FILTLAN_NewtonPolynomial(m,px,py,pp,work));
146
147 /* pp contains coefficients of the Newton polynomial P(z) in the current interval [aa,bb], where
148 P(z) = pp(1) + pp(2)*(z-px(1)) + pp(3)*(z-px(1))*(z-px(2)) + ... + pp(n)*(z-px(1))*...*(z-px(n-1)) */
149
150 /* translate the Newton coefficients to the Chebyshev coefficients */
151
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
19496 PetscCall(FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(m,aa,bb,pp,px,qq,work));
152 /* qq contains coefficients of the polynomial in [aa,bb] in the `translated' Chebyshev basis */
153
154 /* copy the Chebyshev coefficients to baseFilter
155 OCTAVE/MATLAB: B(:,j) = qq, where j = (npoints-1)-jj and B is the return matrix */
156 19496 sq = qq;
157 19496 ii = 2*baseDeg+2;
158
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
448408 while (ii--) *sbf++ = *sq++;
159
160 } else {
161
162 /* a constant polynomial P(z)=flag, where either flag==0 or flag==1
163 OCTAVE/MATLAB: B(1,j) = flag, where j = (npoints-1)-jj and B is the return matrix */
164 29244 *sbf++ = flag;
165
166 /* the other coefficients are all zero, since P(z) is a constant
167 OCTAVE/MATLAB: B(1,j) = 0, where j = (npoints-1)-jj and B is the return matrix */
168 29244 ii = 2*baseDeg+1;
169
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
643368 while (ii--) *sbf++ = 0.0;
170
171 /* for the next point */
172 29244 currentPoint++;
173 }
174 }
175
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
9748 PetscCall(PetscFree5(px,py,pp,qq,work));
176
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
1772 PetscFunctionReturn(PETSC_SUCCESS);
177 }
178
179 /* ////////////////////////////////////////////////////////////////////////////
180 // Base Filter
181 //////////////////////////////////////////////////////////////////////////// */
182
183 /*
184 FILTLAN function GetIntervals
185
186 this routine determines the intervals (including the transition one(s)) by an iterative process
187
188 frame is a vector consisting of 4 ordered elements:
189 [frame(1),frame(4)] is the interval which (tightly) contains all eigenvalues, and
190 [frame(2),frame(3)] is the interval in which the eigenvalues are sought
191 baseDeg is the left-and-right degree of the base filter for each interval
192 polyDeg is the (maximum possible) degree of s(z), with z*s(z) the polynomial filter
193 intv is the output; the j-th interval is [intv(j),intv(j+1))
194 opts is a collection of interval options
195
196 the base filter P(z) is a piecewise polynomial from Hermite interpolation with degree baseDeg
197 at each end point of intervals
198
199 the polynomial filter Q(z) is in the form z*s(z), i.e. Q(0)==0, such that ||(1-P(z))-z*s(z)||_w is
200 minimized with s(z) a polynomial of degree up to polyDeg
201
202 the resulting polynomial filter Q(z) satisfies Q(x)>=Q(y) for x in [frame[1],frame[2]], and
203 y in [frame[0],frame[3]] but not in [frame[1],frame[2]]
204
205 the routine fills a PolynomialFilterInfo struct which gives some information of the polynomial filter
206 */
207 78 static PetscErrorCode FILTLAN_GetIntervals(PetscReal *intervals,PetscReal *frame,PetscInt polyDeg,PetscInt baseDeg,FILTLAN_IOP opts,FILTLAN_PFI filterInfo)
208 {
209 78 PetscReal intv[6],x,y,z1,z2,c,c1,c2,fc,fc2,halfPlateau,leftDelta,rightDelta,gridSize;
210 78 PetscReal yLimit,ySummit,yLeftLimit,yRightLimit,bottom,qIndex,*baseFilter,*polyFilter;
211 78 PetscReal yLimitGap=0.0,yLeftSummit=0.0,yLeftBottom=0.0,yRightSummit=0.0,yRightBottom=0.0;
212 78 PetscInt i,ii,npoints,numIter,numLeftSteps=1,numRightSteps=1,numMoreLooked=0;
213 78 PetscBool leftBoundaryMet=PETSC_FALSE,rightBoundaryMet=PETSC_FALSE,stepLeft,stepRight;
214 78 const PetscReal a=frame[0],a1=frame[1],b1=frame[2],b=frame[3];
215 78 const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 }; /* if filterType is 1, only first 3 elements will be used */
216 78 const PetscInt numLookMore = 2*(PetscInt)(0.5+(PetscLogReal(2.0)/PetscLogReal(opts->shiftStepExpanRate)));
217
218
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
78 PetscFunctionBegin;
219
3/8
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 10 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
78 PetscCheck(a<=a1 && a1<=b1 && b1<=b,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Values in the frame vector should be non-decreasing");
220
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
78 PetscCheck(a1!=b1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The range of wanted eigenvalues cannot be of size zero");
221 78 filterInfo->filterType = 2; /* mid-pass filter, for interior eigenvalues */
222
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
78 if (b == b1) {
223 PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"A polynomial filter should not cover all eigenvalues");
224 filterInfo->filterType = 1; /* high-pass filter, for largest eigenvalues */
225
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
78 } else PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"filterType==3 for smallest eigenvalues should be pre-converted to filterType==1 for largest eigenvalues");
226
227 /* the following recipe follows Yousef Saad (2005, 2006) with a few minor adaptations / enhancements */
228 78 halfPlateau = 0.5*(b1-a1)*opts->initialPlateau; /* half length of the "plateau" of the (dual) base filter */
229 78 leftDelta = (b1-a1)*opts->initialShiftStep; /* initial left shift */
230 78 rightDelta = leftDelta; /* initial right shift */
231 78 opts->numGridPoints = PetscMax(opts->numGridPoints,(PetscInt)(2.0*(b-a)/halfPlateau));
232 78 gridSize = (b-a) / (PetscReal)opts->numGridPoints;
233
234
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
546 for (i=0;i<6;i++) intv[i] = 0.0;
235
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
78 if (filterInfo->filterType == 2) { /* for interior eigenvalues */
236 78 npoints = 6;
237 78 intv[0] = a;
238 78 intv[5] = b;
239 /* intv[1], intv[2], intv[3], and intv[4] to be determined */
240 } else { /* filterType == 1 (or 3 with conversion), for extreme eigenvalues */
241 npoints = 4;
242 intv[0] = a;
243 intv[3] = b;
244 /* intv[1], and intv[2] to be determined */
245 }
246 78 z1 = a1 - leftDelta;
247 78 z2 = b1 + rightDelta;
248 78 filterInfo->filterOK = 0; /* not yet found any OK filter */
249
250 /* allocate matrices */
251
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(PetscMalloc2((polyDeg+2)*(npoints-1),&polyFilter,(2*baseDeg+2)*(npoints-1),&baseFilter));
252
253 /* initialize the intervals, mainly for the case opts->maxOuterIter == 0 */
254 78 intervals[0] = intv[0];
255 78 intervals[3] = intv[3];
256 78 intervals[5] = intv[5];
257 78 intervals[1] = z1;
258
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
78 if (filterInfo->filterType == 2) { /* a mid-pass filter for interior eigenvalues */
259 78 intervals[4] = z2;
260 78 c = (a1+b1) / 2.0;
261 78 intervals[2] = c - halfPlateau;
262 78 intervals[3] = c + halfPlateau;
263 } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
264 intervals[2] = z1 + (b1-z1)*opts->transIntervalRatio;
265 }
266
267 /* the main loop */
268
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
790 for (numIter=1;numIter<=opts->maxOuterIter;numIter++) {
269
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
790 if (z1 <= a) { /* outer loop updates z1 and z2 */
270 180 z1 = a;
271 180 leftBoundaryMet = PETSC_TRUE;
272 }
273
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
790 if (filterInfo->filterType == 2) { /* a <= z1 < (a1) */
274
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
790 if (z2 >= b) { /* a mid-pass filter for interior eigenvalues */
275 z2 = b;
276 rightBoundaryMet = PETSC_TRUE;
277 }
278 /* a <= z1 < c-h < c+h < z2 <= b, where h is halfPlateau */
279 /* [z1, c-h] and [c+h, z2] are transition interval */
280 790 intv[1] = z1;
281 790 intv[4] = z2;
282 790 c1 = z1 + halfPlateau;
283 790 intv[2] = z1; /* i.e. c1 - halfPlateau */
284 790 intv[3] = c1 + halfPlateau;
285
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
790 PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
286
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
790 PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
287 /* fc1 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); */
288 790 c2 = z2 - halfPlateau;
289 790 intv[2] = c2 - halfPlateau;
290 790 intv[3] = z2; /* i.e. c2 + halfPlateau */
291
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
790 PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
292
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
790 PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
293 790 fc2 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
294 790 yLimitGap = PETSC_MAX_REAL;
295 790 ii = opts->maxInnerIter;
296
3/4
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
8880 while (ii-- && !(yLimitGap <= opts->yLimitTol)) {
297 /* recursive bisection to get c such that p(a1) are p(b1) approximately the same */
298 8090 c = (c1+c2) / 2.0;
299 8090 intv[2] = c - halfPlateau;
300 8090 intv[3] = c + halfPlateau;
301
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
8090 PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
302
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
8090 PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
303 8090 fc = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
304
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
8090 if (fc*fc2 < 0.0) {
305 c1 = c;
306 /* fc1 = fc; */
307 } else {
308 4447 c2 = c;
309 4447 fc2 = fc;
310 }
311 8090 yLimitGap = PetscAbsReal(fc);
312 }
313 } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
314 intv[1] = z1;
315 intv[2] = z1 + (b1-z1)*opts->transIntervalRatio;
316 PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,4,HighLowFlags,baseDeg));
317 PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,4,opts->intervalWeights,polyDeg));
318 }
319 /* polyFilter contains the coefficients of the polynomial filter which approximates phi(x)
320 expanded in the `translated' Chebyshev basis */
321 /* psi(x) = 1.0 - phi(x) is the dual base filter approximated by a polynomial in the form x*p(x) */
322 790 yLeftLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
323 790 yRightLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1);
324
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
790 yLimit = (yLeftLimit < yRightLimit) ? yLeftLimit : yRightLimit;
325
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
790 ySummit = (yLeftLimit > yRightLimit) ? yLeftLimit : yRightLimit;
326 x = a1;
327
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
31600 while ((x+=gridSize) < b1) {
328 30810 y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
329
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
30810 if (y < yLimit) yLimit = y;
330
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
30810 if (y > ySummit) ySummit = y;
331 }
332 /* now yLimit is the minimum of x*p(x) for x in [a1, b1] */
333 790 stepLeft = PETSC_FALSE;
334 790 stepRight = PETSC_FALSE;
335
6/6
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
✓ Branch 4 taken 10 times.
✓ Branch 5 taken 10 times.
790 if ((yLimit < yLeftLimit && yLimit < yRightLimit) || yLimit < opts->yBottomLine) {
336 /* very bad, step to see what will happen */
337 250 stepLeft = PETSC_TRUE;
338
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
250 if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
339
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
540 } else if (filterInfo->filterType == 2) {
340
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
540 if (yLeftLimit < yRightLimit) {
341
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
302 if (yRightLimit-yLeftLimit > opts->yLimitTol) stepLeft = PETSC_TRUE;
342
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
238 } else if (yLeftLimit-yRightLimit > opts->yLimitTol) stepRight = PETSC_TRUE;
343 }
344
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
790 if (!stepLeft) {
345 yLeftBottom = yLeftLimit;
346 x = a1;
347
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
28144 while ((x-=gridSize) >= a) {
348 28094 y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
349
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
28094 if (y < yLeftBottom) yLeftBottom = y;
350
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
490 else if (y > yLeftBottom) break;
351 }
352 yLeftSummit = yLeftBottom;
353
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1670316 while ((x-=gridSize) >= a) {
354 1669776 y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
355
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1669776 if (y > yLeftSummit) {
356 23866 yLeftSummit = y;
357
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
23866 if (yLeftSummit > yLimit*opts->yRippleLimit) {
358 stepLeft = PETSC_TRUE;
359 break;
360 }
361 }
362
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1669776 if (y < yLeftBottom) yLeftBottom = y;
363 }
364 }
365
3/4
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
790 if (filterInfo->filterType == 2 && !stepRight) {
366 yRightBottom = yRightLimit;
367 x = b1;
368
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
29748 while ((x+=gridSize) <= b) {
369 29748 y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
370
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
29748 if (y < yRightBottom) yRightBottom = y;
371
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
540 else if (y > yRightBottom) break;
372 }
373 yRightSummit = yRightBottom;
374
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1328372 while ((x+=gridSize) <= b) {
375 1327832 y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
376
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1327832 if (y > yRightSummit) {
377 23970 yRightSummit = y;
378
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
23970 if (yRightSummit > yLimit*opts->yRippleLimit) {
379 stepRight = PETSC_TRUE;
380 break;
381 }
382 }
383
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1327832 if (y < yRightBottom) yRightBottom = y;
384 }
385 }
386
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
790 if (!stepLeft && !stepRight) {
387
3/4
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
540 if (filterInfo->filterType == 2) bottom = PetscMin(yLeftBottom,yRightBottom);
388 else bottom = yLeftBottom;
389 540 qIndex = 1.0 - (yLimit-bottom) / (ySummit-bottom);
390
4/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
540 if (filterInfo->filterOK == 0 || filterInfo->filterQualityIndex < qIndex) {
391 /* found the first OK filter or a better filter */
392
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1596 for (i=0;i<6;i++) intervals[i] = intv[i];
393 228 filterInfo->filterOK = 1;
394 228 filterInfo->filterQualityIndex = qIndex;
395 228 filterInfo->numIter = numIter;
396 228 filterInfo->yLimit = yLimit;
397 228 filterInfo->ySummit = ySummit;
398 228 filterInfo->numLeftSteps = numLeftSteps;
399 228 filterInfo->yLeftSummit = yLeftSummit;
400 228 filterInfo->yLeftBottom = yLeftBottom;
401
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
228 if (filterInfo->filterType == 2) {
402 228 filterInfo->yLimitGap = yLimitGap;
403 228 filterInfo->numRightSteps = numRightSteps;
404 228 filterInfo->yRightSummit = yRightSummit;
405 228 filterInfo->yRightBottom = yRightBottom;
406 }
407 numMoreLooked = 0;
408
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
312 } else if (++numMoreLooked == numLookMore) {
409 /* filter has been optimal */
410 78 filterInfo->filterOK = 2;
411 78 break;
412 }
413 /* try stepping further to see whether it can improve */
414 462 stepLeft = PETSC_TRUE;
415
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
462 if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
416 }
417 /* check whether we can really "step" */
418
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
712 if (leftBoundaryMet) {
419
2/4
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
140 if (filterInfo->filterType == 1 || rightBoundaryMet) break; /* cannot step further, so break the loop */
420
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
140 if (stepLeft) {
421 /* cannot step left, so try stepping right */
422 140 stepLeft = PETSC_FALSE;
423 140 stepRight = PETSC_TRUE;
424 }
425 }
426
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
712 if (rightBoundaryMet && stepRight) {
427 /* cannot step right, so try stepping left */
428 stepRight = PETSC_FALSE;
429 stepLeft = PETSC_TRUE;
430 }
431 /* now "step" */
432
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
712 if (stepLeft) {
433 572 numLeftSteps++;
434
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
572 if (filterInfo->filterType == 2) leftDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
435 572 z1 -= leftDelta;
436 }
437
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
712 if (stepRight) {
438 712 numRightSteps++;
439 712 rightDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
440 712 z2 += rightDelta;
441 }
442
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
712 if (filterInfo->filterType == 2) {
443 /* shrink the "plateau" of the (dual) base filter */
444
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
712 if (stepLeft && stepRight) halfPlateau /= opts->plateauShrinkRate;
445 140 else halfPlateau /= PetscSqrtReal(opts->plateauShrinkRate);
446 }
447 }
448
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
78 PetscCheck(filterInfo->filterOK,PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"STFILTER cannot get the filter specified; please adjust your filter parameters (e.g. increasing the polynomial degree)");
449
450 78 filterInfo->totalNumIter = numIter;
451
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(PetscFree2(polyFilter,baseFilter));
452
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
14 PetscFunctionReturn(PETSC_SUCCESS);
453 }
454
455 /* ////////////////////////////////////////////////////////////////////////////
456 // Chebyshev Polynomials
457 //////////////////////////////////////////////////////////////////////////// */
458
459 /*
460 FILTLAN function ExpandNewtonPolynomialInChebyshevBasis
461
462 translate the coefficients of a Newton polynomial to the coefficients
463 in a basis of the `translated' (scale-and-shift) Chebyshev polynomials
464
465 input:
466 a Newton polynomial defined by vectors a and x:
467 P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1))
468 the interval [aa,bb] defines the `translated' Chebyshev polynomials S_i(z) = T_i((z-c)/h),
469 where c=(aa+bb)/2 and h=(bb-aa)/2, and T_i is the Chebyshev polynomial of the first kind
470 note that T_i is defined by T_0(z)=1, T_1(z)=z, and T_i(z)=2*z*T_{i-1}(z)+T_{i-2}(z) for i>=2
471
472 output:
473 a vector q containing the Chebyshev coefficients:
474 P(z) = q(1)*S_0(z) + q(2)*S_1(z) + ... + q(n)*S_{n-1}(z)
475 */
476 19496 static inline PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt n,PetscReal aa,PetscReal bb,PetscReal *a,PetscReal *x,PetscReal *q,PetscReal *q2)
477 {
478 19496 PetscInt m,mm;
479 19496 PetscReal *sa,*sx,*sq,*sq2,c,c2,h,h2;
480
481
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
19496 PetscFunctionBegin;
482 19496 sa = a+n; /* pointers for traversing a and x */
483 19496 sx = x+n-1;
484 19496 *q = *--sa; /* set q[0] = a(n) */
485
486 19496 c = (aa+bb)/2.0;
487 19496 h = (bb-aa)/2.0;
488 19496 h2 = h/2.0;
489
490
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
428912 for (m=1;m<=n-1;m++) { /* the main loop for translation */
491
492 /* compute q2[0:m-1] = (c-x[n-m-1])*q[0:m-1] */
493 409416 mm = m;
494 409416 sq = q;
495 409416 sq2 = q2;
496 409416 c2 = c-(*--sx);
497
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
4912992 while (mm--) *(sq2++) = c2*(*sq++);
498 409416 *sq2 = 0.0; /* q2[m] = 0.0 */
499 409416 *(q2+1) += h*(*q); /* q2[1] = q2[1] + h*q[0] */
500
501 /* compute q2[0:m-2] = q2[0:m-2] + h2*q[1:m-1] */
502 409416 mm = m-1;
503 409416 sq2 = q2;
504 409416 sq = q+1;
505
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
4503576 while (mm--) *(sq2++) += h2*(*sq++);
506
507 /* compute q2[2:m] = q2[2:m] + h2*q[1:m-1] */
508 409416 mm = m-1;
509 409416 sq2 = q2+2;
510 409416 sq = q+1;
511
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
4503576 while (mm--) *(sq2++) += h2*(*sq++);
512
513 /* compute q[0:m] = q2[0:m] */
514 409416 mm = m+1;
515 409416 sq2 = q2;
516 409416 sq = q;
517
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
5322408 while (mm--) *sq++ = *sq2++;
518 409416 *q += (*--sa); /* q[0] = q[0] + p[n-m-1] */
519 }
520
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
19496 PetscFunctionReturn(PETSC_SUCCESS);
521 }
522
523 /*
524 FILTLAN function PolynomialEvaluationInChebyshevBasis
525
526 evaluate P(z) at z=z0, where P(z) is a polynomial expanded in a basis of
527 the `translated' (i.e. scale-and-shift) Chebyshev polynomials
528
529 input:
530 c is a vector of Chebyshev coefficients which defines the polynomial
531 P(z) = c(1)*S_0(z) + c(2)*S_1(z) + ... + c(n)*S_{n-1}(z),
532 where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
533 c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2
534 note that T_i(z) is the Chebyshev polynomial of the first kind,
535 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
536
537 output:
538 the evaluated value of P(z) at z=z0
539 */
540 3105600 static inline PetscReal FILTLAN_PolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscInt idx,PetscReal z0,PetscReal aa,PetscReal bb)
541 {
542 3105600 PetscInt ii,deg1;
543 3105600 PetscReal y,zz,t0,t1,t2,*sc;
544
545
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
3105600 PetscFunctionBegin;
546 3105600 deg1 = m;
547
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
3105600 if (aa==-1.0 && bb==1.0) zz = z0; /* treat it as a special case to reduce rounding errors */
548
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
3105600 else zz = (aa==bb)? 0.0 : -1.0+2.0*(z0-aa)/(bb-aa);
549
550 /* compute y = P(z0), where we utilize the Chebyshev recursion */
551 3105600 sc = pp+(idx-1)*m; /* sc points to column idx of pp */
552 3105600 y = *sc++;
553 3105600 t1 = 1.0; t2 = zz;
554 3105600 ii = deg1-1;
555
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
616263700 while (ii--) {
556 /* Chebyshev recursion: T_0(zz)=1, T_1(zz)=zz, and T_{i+1}(zz) = 2*zz*T_i(zz) + T_{i-1}(zz) for i>=2
557 the values of T_{i+1}(zz), T_i(zz), T_{i-1}(zz) are stored in t0, t1, t2, respectively */
558 613158100 t0 = 2*zz*t1 - t2;
559 /* it also works for the base case / the first iteration, where t0 equals 2*zz*1-zz == zz which is T_1(zz) */
560 613158100 t2 = t1;
561 613158100 t1 = t0;
562 613158100 y += (*sc++)*t0;
563 }
564
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
3105600 PetscFunctionReturn(y);
565 }
566
567 #define basisTranslated PETSC_TRUE
568 /*
569 FILTLAN function PiecewisePolynomialEvaluationInChebyshevBasis
570
571 evaluate P(z) at z=z0, where P(z) is a piecewise polynomial expanded
572 in a basis of the (optionally translated, i.e. scale-and-shift) Chebyshev polynomials for each interval
573
574 input:
575 intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
576 pp is a matrix of Chebyshev coefficients which defines a piecewise polynomial P(z)
577 in a basis of the `translated' Chebyshev polynomials in each interval
578 the polynomial P_j(z) in the j-th interval, i.e. when z is in [intv(j), intv(j+1)), is defined by the j-th column of pp:
579 if basisTranslated == false, then
580 P_j(z) = pp(1,j)*T_0(z) + pp(2,j)*T_1(z) + ... + pp(n,j)*T_{n-1}(z),
581 where T_i(z) is the Chebyshev polynomial of the first kind,
582 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
583 if basisTranslated == true, then
584 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
585 where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
586 c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2
587
588 output:
589 the evaluated value of P(z) at z=z0
590
591 note that if z0 falls below the first interval, then the polynomial in the first interval will be used to evaluate P(z0)
592 if z0 flies over the last interval, then the polynomial in the last interval will be used to evaluate P(z0)
593 */
594 3105600 static inline PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscReal *intv,PetscInt npoints,PetscReal z0)
595 {
596 3105600 PetscReal *sintv,aa,bb,resul;
597 3105600 PetscInt idx;
598
599
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
3105600 PetscFunctionBegin;
600 /* determine the interval which contains z0 */
601 3105600 sintv = &intv[1];
602 3105600 idx = 1;
603
3/4
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
3105600 if (npoints>2 && z0 >= *sintv) {
604 1414756 sintv++;
605
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
5525928 while (++idx < npoints-1) {
606
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
4178064 if (z0 < *sintv) break;
607 4111172 sintv++;
608 }
609 }
610 /* idx==1 if npoints<=2; otherwise idx satisfies:
611 intv(idx) <= z0 < intv(idx+1), if 2 <= idx <= npoints-2
612 z0 < intv(idx+1), if idx == 1
613 intv(idx) <= z0, if idx == npoints-1
614 in addition, sintv points to &intv(idx+1) */
615 3105600 if (basisTranslated) {
616 /* the basis consists of `translated' Chebyshev polynomials */
617 /* find the interval of concern, [aa,bb] */
618 3105600 aa = *(sintv-1);
619 3105600 bb = *sintv;
620 3105600 resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,aa,bb);
621 } else {
622 /* the basis consists of standard Chebyshev polynomials, with interval [-1.0,1.0] for integration */
623 resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,-1.0,1.0);
624 }
625
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
3105600 PetscFunctionReturn(resul);
626 }
627
628 /*
629 FILTLAN function PiecewisePolynomialInnerProductInChebyshevBasis
630
631 compute the weighted inner product of two piecewise polynomials expanded
632 in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
633
634 pp and qq are two matrices of Chebyshev coefficients which define the piecewise polynomials P(z) and Q(z), respectively
635 for z in the j-th interval, P(z) equals
636 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
637 and Q(z) equals
638 Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
639 where S_i(z) is the `translated' Chebyshev polynomial in that interval,
640 S_i((z-c)/h) = T_i(z), c = (aa+bb)) / 2, h = (bb-aa) / 2,
641 with T_i(z) the Chebyshev polynomial of the first kind
642 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
643
644 the (scaled) j-th interval inner product is defined by
645 <P_j,Q_j> = (Pi/2)*(pp(1,j)*qq(1,j) + sum_{k} pp(k,j)*qq(k,j)),
646 which comes from the property
647 <T_0,T_0>=pi, <T_i,T_i>=pi/2 for i>=1, and <T_i,T_j>=0 for i!=j
648
649 the weighted inner product is <P,Q> = sum_{j} intervalWeights(j)*<P_j,Q_j>,
650 which is the return value
651
652 note that for unit weights, pass an empty vector of intervalWeights (i.e. of length 0)
653 */
654 6391605 static inline PetscReal FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(PetscReal *pp,PetscInt prows,PetscInt pcols,PetscInt ldp,PetscReal *qq,PetscInt qrows,PetscInt qcols,PetscInt ldq,const PetscReal *intervalWeights)
655 {
656 6391605 PetscInt nintv,deg1,i,k;
657 6391605 PetscReal *sp,*sq,ans=0.0,ans2;
658
659
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
6391605 PetscFunctionBegin;
660 6391605 deg1 = PetscMin(prows,qrows); /* number of effective coefficients, one more than the effective polynomial degree */
661
2/14
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
6391605 if (PetscUnlikely(!deg1)) PetscFunctionReturn(0.0);
662 6391605 nintv = PetscMin(pcols,qcols); /* number of intervals */
663
664 /* scaled by intervalWeights(i) in the i-th interval (we assume intervalWeights[] are always provided).
665 compute ans = sum_{i=1,...,nintv} intervalWeights(i)*[ pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) ] */
666
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
38349630 for (i=0;i<nintv;i++) { /* compute ans2 = pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) */
667 31958025 sp = pp+i*ldp;
668 31958025 sq = qq+i*ldq;
669 31958025 ans2 = (*sp) * (*sq); /* the first term pp(1,i)*qq(1,i) is being added twice */
670
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1889817825 for (k=0;k<deg1;k++) ans2 += (*sp++)*(*sq++); /* add pp(k,i)*qq(k,i) */
671 31958025 ans += ans2*intervalWeights[i]; /* compute ans += ans2*intervalWeights(i) */
672 }
673
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
6391605 PetscFunctionReturn(ans*PETSC_PI/2.0);
674 }
675
676 /*
677 FILTLAN function PiecewisePolynomialInChebyshevBasisMultiplyX
678
679 compute Q(z) = z*P(z), where P(z) and Q(z) are piecewise polynomials expanded
680 in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
681
682 P(z) and Q(z) are stored as matrices of Chebyshev coefficients pp and qq, respectively
683
684 For z in the j-th interval, P(z) equals
685 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
686 and Q(z) equals
687 Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
688 where S_i(z) is the `translated' Chebyshev polynomial in that interval,
689 S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
690 with T_i(z) the Chebyshev polynomial of the first kind
691 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
692
693 the returned matrix is qq which represents Q(z) = z*P(z)
694 */
695 2134605 static inline PetscErrorCode FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(PetscReal *pp,PetscInt deg1,PetscInt ldp,PetscReal *intv,PetscInt nintv,PetscReal *qq,PetscInt ldq)
696 {
697 2134605 PetscInt i,j;
698 2134605 PetscReal c,h,h2,tmp,*sp,*sq,*sp2,*sq2;
699
700
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2134605 PetscFunctionBegin;
701
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
12807630 for (j=0;j<nintv;j++) { /* consider interval between intv(j) and intv(j+1) */
702 10673025 sp = pp+j*ldp;
703 10673025 sq = qq+j*ldq;
704 10673025 sp2 = sp;
705 10673025 sq2 = sq+1;
706 10673025 c = 0.5*(intv[j] + intv[j+1]); /* compute c = (intv(j) + intv(j+1))/2 */
707 10673025 h = 0.5*(intv[j+1] - intv[j]); /* compute h = (intv(j+1) - intv(j))/2 and h2 = h/2 */
708 10673025 h2 = 0.5*h;
709 10673025 i = deg1;
710
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
826715325 while (i--) *sq++ = c*(*sp++); /* compute q(1:deg1,j) = c*p(1:deg1,j) */
711 10673025 *sq++ = 0.0; /* set q(deg1+1,j) = 0.0 */
712 10673025 *(sq2++) += h*(*sp2++); /* compute q(2,j) = q(2,j) + h*p(1,j) */
713 10673025 i = deg1-1;
714
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
816042300 while (i--) { /* compute q(3:deg1+1,j) += h2*p(2:deg1,j) and then q(1:deg1-1,j) += h2*p(2:deg1,j) */
715 805369275 tmp = h2*(*sp2++);
716 805369275 *(sq2-2) += tmp;
717 805369275 *(sq2++) += tmp;
718 }
719 }
720
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
2134605 PetscFunctionReturn(PETSC_SUCCESS);
721 }
722
723 /* ////////////////////////////////////////////////////////////////////////////
724 // Conjugate Residual Method in the Polynomial Space
725 //////////////////////////////////////////////////////////////////////////// */
726
727 /*
728 B := alpha*A + beta*B
729
730 A,B are nxk
731 */
732 8494660 static inline PetscErrorCode Mat_AXPY_BLAS(PetscInt n,PetscInt k,PetscReal alpha,const PetscReal *A,PetscInt lda,PetscReal beta,PetscReal *B,PetscInt ldb)
733 {
734 8494660 PetscInt i,j;
735
736
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
8494660 PetscFunctionBegin;
737
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
8494660 if (beta==(PetscReal)1.0) {
738
4/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
1670879500 for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] += alpha*A[i+j*lda];
739
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4257000 PetscCall(PetscLogFlops(2.0*n*k));
740 } else {
741
4/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
1667789210 for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] = alpha*A[i+j*lda] + beta*B[i+j*ldb];
742
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4237660 PetscCall(PetscLogFlops(3.0*n*k));
743 }
744
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
8494660 PetscFunctionReturn(PETSC_SUCCESS);
745 }
746
747 /*
748 FILTLAN function FilteredConjugateResidualPolynomial
749
750 ** Conjugate Residual Method in the Polynomial Space
751
752 this routine employs a conjugate-residual-type algorithm in polynomial space to minimize ||P(z)-Q(z)||_w,
753 where P(z), the base filter, is the input piecewise polynomial, and
754 Q(z) is the output polynomial satisfying Q(0)==1, i.e. the constant term of Q(z) is 1
755 niter is the number of conjugate-residual iterations; therefore, the degree of Q(z) is up to niter+1
756 both P(z) and Q(z) are expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
757 and presented as matrices of Chebyshev coefficients, denoted by pp and qq, respectively
758
759 input:
760 intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
761 w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
762 the interval weights define the inner product of two continuous functions and then
763 the derived w-norm ||P(z)-Q(z)||_w
764 pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
765 to be specific, for z in [intv(j), intv(j+1)), P(z) equals
766 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
767 where S_i(z) is the `translated' Chebyshev polynomial in that interval,
768 S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
769 with T_i(z) the Chebyshev polynomial of the first kind,
770 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
771
772 output:
773 the return matrix, denoted by qq, represents a polynomial Q(z) with degree up to 1+niter
774 and satisfying Q(0)==1, such that ||P(z))-Q(z)||_w is minimized
775 this polynomial Q(z) is expanded in the `translated' Chebyshev basis for each interval
776 to be precise, considering z in [intv(j), intv(j+1)), Q(z) equals
777 Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(niter+2,j)*S_{niter+1}(z)
778
779 note:
780 1. since Q(0)==1, P(0)==1 is expected; if P(0)!=1, one can translate P(z)
781 for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
782 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
783 in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
784 */
785 9670 static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal *cpol,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt m,PetscReal *intervalWeights,PetscInt niter)
786 {
787 9670 PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld,nintv;
788 9670 PetscReal rho,rho0,rho1,den,bet,alp,alp0,*ppol,*rpol,*appol,*arpol;
789
790
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
9670 PetscFunctionBegin;
791 9670 nintv = m-1;
792 9670 ld = niter+2; /* leading dimension */
793
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
9670 PetscCall(PetscCalloc4(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&appol,ld*nintv,&arpol));
794
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
9670 PetscCall(PetscArrayzero(cpol,ld*nintv));
795 /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
796 58020 sppol = 2;
797 58020 srpol = 2;
798 58020 scpol = 2;
799
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
58020 for (j=0;j<nintv;j++) {
800 48350 ppol[j*ld] = 1.0;
801 48350 rpol[j*ld] = 1.0;
802 48350 cpol[j*ld] = 1.0;
803 }
804 /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
805 rpol is the r-polynomial (corresponding to the residual vector r in CG)
806 cpol is the "corrected" residual polynomial (result of this function) */
807 9670 sappol = 3;
808 9670 sarpol = 3;
809
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
9670 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
810
4/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
183730 for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
811 9670 rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
812
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
1339258 for (i=0;i<niter;i++) {
813 1337500 den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
814 1337500 alp0 = rho/den;
815 1337500 rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
816 1337500 alp = (rho-rho1)/den;
817 1337500 srpol++;
818 1337500 scpol++;
819
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1337500 PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
820
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1337500 PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
821
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1337500 if (i+1 == niter) break;
822 1327830 sarpol++;
823
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1327830 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
824 1327830 rho0 = rho;
825 1327830 rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
826 1327830 bet = rho / rho0;
827 1327830 sppol++;
828 1327830 sappol++;
829
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1327830 PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
830
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1327830 PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
831 }
832
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
9670 PetscCall(PetscFree4(ppol,rpol,appol,arpol));
833
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
1758 PetscFunctionReturn(PETSC_SUCCESS);
834 }
835
836 /*
837 FILTLAN function FilteredConjugateResidualMatrixPolynomialVectorProduct
838
839 this routine employs a conjugate-residual-type algorithm in polynomial space to compute
840 x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized, where
841 P(z) is a given piecewise polynomial, called the base filter,
842 s(z) is a polynomial of degree up to niter, the number of conjugate-residual iterations,
843 and b and x0 are given vectors
844
845 note that P(z) is expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
846 and presented as a matrix of Chebyshev coefficients pp
847
848 input:
849 A is a sparse matrix
850 x0, b are vectors
851 niter is the number of conjugate-residual iterations
852 intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
853 w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
854 the interval weights define the inner product of two continuous functions and then
855 the derived w-norm ||P(z)-Q(z)||_w
856 pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
857 to be specific, for z in [intv(j), intv(j+1)), P(z) equals
858 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
859 where S_i(z) is the `translated' Chebyshev polynomial in that interval,
860 S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
861 with T_i(z) the Chebyshev polynomial of the first kind,
862 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
863 tol is the tolerance; if the residual polynomial in z-norm is dropped by a factor lower
864 than tol, then stop the conjugate-residual iteration
865
866 output:
867 the return vector is x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized,
868 subject to that s(z) is a polynomial of degree up to niter, where P(z) is the base filter
869 in short, z*s(z) approximates 1-P(z)
870
871 note:
872 1. since z*s(z) approximates 1-P(z), P(0)==1 is expected; if P(0)!=1, one can translate P(z)
873 for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
874 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
875 in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
876 3. a common application is to compute R(A)*b, where R(z) approximates 1-P(z)
877 in this case, one can set x0 = 0 and then the return vector is x = s(A)*b, where
878 z*s(z) approximates 1-P(z); therefore, A*x is the wanted R(A)*b
879 */
880 5748 static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(Mat A,Vec b,Vec x,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work)
881 {
882 5748 PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld;
883 5748 PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
884 5748 Vec r=work[0],p=work[1],ap=work[2],w=work[3];
885 5748 PetscScalar alpha;
886
887
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
5748 PetscFunctionBegin;
888 5748 ld = niter+3; /* leading dimension */
889
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
5748 PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol));
890 /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
891 34488 sppol = 2;
892 34488 srpol = 2;
893 34488 scpol = 2;
894
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
34488 for (j=0;j<nintv;j++) {
895 28740 ppol[j*ld] = 1.0;
896 28740 rpol[j*ld] = 1.0;
897 28740 cpol[j*ld] = 1.0;
898 }
899 /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
900 rpol is the r-polynomial (corresponding to the residual vector r in CG)
901 cpol is the "corrected" residual polynomial */
902 5748 sappol = 3;
903 5748 sarpol = 3;
904
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
5748 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
905
4/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
109212 for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
906 5748 rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
907 5748 rho = rho00;
908
909 /* corrected CR in vector space */
910 /* we assume x0 is always 0 */
911
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
5748 PetscCall(VecSet(x,0.0));
912
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
5748 PetscCall(VecCopy(b,r)); /* initial residual r = b-A*x0 */
913
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
5748 PetscCall(VecCopy(r,p)); /* p = r */
914
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
5748 PetscCall(MatMult(A,p,ap)); /* ap = A*p */
915
916
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
725348 for (i=0;i<niter;i++) {
917 /* iteration in the polynomial space */
918 719600 den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
919 719600 alp0 = rho/den;
920 719600 rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
921 719600 alp = (rho-rho1)/den;
922 719600 srpol++;
923 719600 scpol++;
924
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
719600 PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
925
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
719600 PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
926 719600 sarpol++;
927
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
719600 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
928 719600 rho0 = rho;
929 719600 rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
930
931 /* update x in the vector space */
932 719600 alpha = alp;
933
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
719600 PetscCall(VecAXPY(x,alpha,p)); /* x += alp*p */
934
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
719600 if (rho < tol*rho00) break;
935
936 /* finish the iteration in the polynomial space */
937 719600 bet = rho / rho0;
938 719600 sppol++;
939 719600 sappol++;
940
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
719600 PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
941
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
719600 PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
942
943 /* finish the iteration in the vector space */
944 719600 alpha = -alp0;
945
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
719600 PetscCall(VecAXPY(r,alpha,ap)); /* r -= alp0*ap */
946 719600 alpha = bet;
947
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
719600 PetscCall(VecAYPX(p,alpha,r)); /* p = r + bet*p */
948
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
719600 PetscCall(MatMult(A,r,w)); /* ap = A*r + bet*ap */
949
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
719600 PetscCall(VecAYPX(ap,alpha,w));
950 }
951
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
5748 PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol));
952
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
1084 PetscFunctionReturn(PETSC_SUCCESS);
953 }
954
955 /*
956 Gateway to FILTLAN for evaluating y=p(A)*x
957 */
958 5748 static PetscErrorCode MatMult_FILTLAN(Mat A,Vec x,Vec y)
959 {
960 5748 ST st;
961 5748 ST_FILTER *ctx;
962 5748 FILTLAN_CTX filtlan;
963 5748 PetscInt npoints;
964
965
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
5748 PetscFunctionBegin;
966
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
5748 PetscCall(MatShellGetContext(A,&st));
967 5748 ctx = (ST_FILTER*)st->data;
968 5748 filtlan = (FILTLAN_CTX)ctx->data;
969
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
5748 npoints = (filtlan->filterInfo->filterType == 2)? 6: 4;
970
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
5748 PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(ctx->T,x,y,filtlan->baseFilter,2*filtlan->baseDegree+2,filtlan->intervals,npoints-1,filtlan->opts->intervalWeights,ctx->polyDegree,st->work));
971
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
5748 PetscCall(VecCopy(y,st->work[0]));
972
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
5748 PetscCall(MatMult(ctx->T,st->work[0],y));
973
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
1084 PetscFunctionReturn(PETSC_SUCCESS);
974 }
975
976 /* Block version of FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct */
977 357 static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(Mat A,Mat B,Mat C,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work,Mat R,Mat P,Mat AP,Mat W)
978 {
979 357 PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld;
980 357 PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
981 357 PetscScalar alpha;
982
983
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
357 PetscFunctionBegin;
984 357 ld = niter+3; /* leading dimension */
985
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
357 PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol));
986 /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
987 2142 sppol = 2;
988 2142 srpol = 2;
989 2142 scpol = 2;
990
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
2142 for (j=0;j<nintv;j++) {
991 1785 ppol[j*ld] = 1.0;
992 1785 rpol[j*ld] = 1.0;
993 1785 cpol[j*ld] = 1.0;
994 }
995 /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
996 rpol is the r-polynomial (corresponding to the residual vector r in CG)
997 cpol is the "corrected" residual polynomial */
998 357 sappol = 3;
999 357 sarpol = 3;
1000
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
357 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
1001
4/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
6783 for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
1002 357 rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
1003 357 rho = rho00;
1004
1005 /* corrected CR in vector space */
1006 /* we assume x0 is always 0 */
1007
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
357 PetscCall(MatZeroEntries(C));
1008
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
357 PetscCall(MatCopy(B,R,SAME_NONZERO_PATTERN)); /* initial residual r = b-A*x0 */
1009
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
357 PetscCall(MatCopy(R,P,SAME_NONZERO_PATTERN)); /* p = r */
1010
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
357 PetscCall(MatMatMult(A,P,MAT_REUSE_MATRIX,PETSC_DEFAULT,&AP)); /* ap = A*p */
1011
1012
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
71757 for (i=0;i<niter;i++) {
1013 /* iteration in the polynomial space */
1014 71400 den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
1015 71400 alp0 = rho/den;
1016 71400 rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
1017 71400 alp = (rho-rho1)/den;
1018 71400 srpol++;
1019 71400 scpol++;
1020
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
71400 PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
1021
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
71400 PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
1022 71400 sarpol++;
1023
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
71400 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
1024 71400 rho0 = rho;
1025 71400 rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
1026
1027 /* update x in the vector space */
1028 71400 alpha = alp;
1029
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
71400 PetscCall(MatAXPY(C,alpha,P,SAME_NONZERO_PATTERN)); /* x += alp*p */
1030
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
71400 if (rho < tol*rho00) break;
1031
1032 /* finish the iteration in the polynomial space */
1033 71400 bet = rho / rho0;
1034 71400 sppol++;
1035 71400 sappol++;
1036
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
71400 PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
1037
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
71400 PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
1038
1039 /* finish the iteration in the vector space */
1040 71400 alpha = -alp0;
1041
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
71400 PetscCall(MatAXPY(R,alpha,AP,SAME_NONZERO_PATTERN)); /* r -= alp0*ap */
1042 71400 alpha = bet;
1043
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
71400 PetscCall(MatAYPX(P,alpha,R,SAME_NONZERO_PATTERN)); /* p = r + bet*p */
1044
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
71400 PetscCall(MatMatMult(A,R,MAT_REUSE_MATRIX,PETSC_DEFAULT,&W)); /* ap = A*r + bet*ap */
1045
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
71400 PetscCall(MatAYPX(AP,alpha,W,SAME_NONZERO_PATTERN));
1046 }
1047
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
357 PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol));
1048
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
51 PetscFunctionReturn(PETSC_SUCCESS);
1049 }
1050
1051 /*
1052 Gateway to FILTLAN for evaluating C=p(A)*B
1053 */
1054 357 static PetscErrorCode MatMatMult_FILTLAN(Mat A,Mat B,Mat C,void *pctx)
1055 {
1056 357 ST st;
1057 357 ST_FILTER *ctx;
1058 357 FILTLAN_CTX filtlan;
1059 357 PetscInt i,m1,m2,npoints;
1060
1061
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
357 PetscFunctionBegin;
1062
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
357 PetscCall(MatShellGetContext(A,&st));
1063 357 ctx = (ST_FILTER*)st->data;
1064 357 filtlan = (FILTLAN_CTX)ctx->data;
1065
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
357 npoints = (filtlan->filterInfo->filterType == 2)? 6: 4;
1066
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
357 if (ctx->nW) { /* check if work matrices must be resized */
1067
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
343 PetscCall(MatGetSize(B,NULL,&m1));
1068
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
343 PetscCall(MatGetSize(ctx->W[0],NULL,&m2));
1069
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
343 if (m1!=m2) {
1070
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
42 PetscCall(MatDestroyMatrices(ctx->nW,&ctx->W));
1071 42 ctx->nW = 0;
1072 }
1073 }
1074
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
357 if (!ctx->nW) { /* allocate work matrices */
1075 56 ctx->nW = 4;
1076
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
56 PetscCall(PetscMalloc1(ctx->nW,&ctx->W));
1077
7/8
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 8 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 2 times.
280 for (i=0;i<ctx->nW;i++) PetscCall(MatDuplicate(B,MAT_DO_NOT_COPY_VALUES,&ctx->W[i]));
1078 }
1079
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
357 PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(ctx->T,B,ctx->W[0],filtlan->baseFilter,2*filtlan->baseDegree+2,filtlan->intervals,npoints-1,filtlan->opts->intervalWeights,ctx->polyDegree,st->work,C,ctx->W[1],ctx->W[2],ctx->W[3]));
1080
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
357 PetscCall(MatMatMult(ctx->T,ctx->W[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&C));
1081
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
51 PetscFunctionReturn(PETSC_SUCCESS);
1082 }
1083
1084 /*
1085 FILTLAN function PolynomialFilterInterface::setFilter
1086
1087 Creates the shifted (and scaled) matrix and the base filter P(z).
1088 M is a shell matrix whose MatMult() applies the filter.
1089 */
1090 88 static PetscErrorCode STComputeOperator_Filter_FILTLAN(ST st,Mat *G)
1091 {
1092 88 ST_FILTER *ctx = (ST_FILTER*)st->data;
1093 88 FILTLAN_CTX filtlan = (FILTLAN_CTX)ctx->data;
1094 88 PetscInt i,npoints,n,m,N,M;
1095 88 PetscReal frame2[4];
1096 88 PetscScalar alpha;
1097 88 const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 };
1098
1099
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
88 PetscFunctionBegin;
1100
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
88 PetscCall(STSetWorkVecs(st,4));
1101 88 filtlan->frame[0] = ctx->left;
1102 88 filtlan->frame[1] = ctx->inta;
1103 88 filtlan->frame[2] = ctx->intb;
1104 88 filtlan->frame[3] = ctx->right;
1105
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
88 if (filtlan->frame[0] == filtlan->frame[1]) { /* low pass filter, convert it to high pass filter */
1106 /* T = frame[3]*eye(n) - A */
1107 PetscCall(MatDestroy(&ctx->T));
1108 PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T));
1109 PetscCall(MatScale(ctx->T,-1.0));
1110 alpha = filtlan->frame[3];
1111 PetscCall(MatShift(ctx->T,alpha));
1112 for (i=0;i<4;i++) frame2[i] = filtlan->frame[3] - filtlan->frame[3-i];
1113 } else { /* it can be a mid-pass filter or a high-pass filter */
1114
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
88 if (filtlan->frame[0] == 0.0) {
1115 PetscCall(PetscObjectReference((PetscObject)st->A[0]));
1116 PetscCall(MatDestroy(&ctx->T));
1117 ctx->T = st->A[0];
1118 for (i=0;i<4;i++) frame2[i] = filtlan->frame[i];
1119 } else {
1120 /* T = A - frame[0]*eye(n) */
1121
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
88 PetscCall(MatDestroy(&ctx->T));
1122
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
88 PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T));
1123 88 alpha = -filtlan->frame[0];
1124
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
88 PetscCall(MatShift(ctx->T,alpha));
1125
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
440 for (i=0;i<4;i++) frame2[i] = filtlan->frame[i] - filtlan->frame[0];
1126 }
1127 }
1128
1129 /* no need to recompute filter if the parameters did not change */
1130
3/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 10 times.
88 if (st->state==ST_STATE_INITIAL || ctx->filtch) {
1131
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(FILTLAN_GetIntervals(filtlan->intervals,frame2,ctx->polyDegree,filtlan->baseDegree,filtlan->opts,filtlan->filterInfo));
1132 /* translate the intervals back */
1133
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
78 if (filtlan->frame[0] == filtlan->frame[1]) { /* low pass filter, convert it to high pass filter */
1134 for (i=0;i<4;i++) filtlan->intervals2[i] = filtlan->frame[3] - filtlan->intervals[3-i];
1135 } else { /* it can be a mid-pass filter or a high-pass filter */
1136
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
78 if (filtlan->frame[0] == 0.0) {
1137 for (i=0;i<6;i++) filtlan->intervals2[i] = filtlan->intervals[i];
1138 } else {
1139
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
546 for (i=0;i<6;i++) filtlan->intervals2[i] = filtlan->intervals[i] + filtlan->frame[0];
1140 }
1141 }
1142
1143
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
78 npoints = (filtlan->filterInfo->filterType == 2)? 6: 4;
1144
5/8
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
78 PetscCall(PetscFree(filtlan->baseFilter));
1145
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(PetscMalloc1((2*filtlan->baseDegree+2)*(npoints-1),&filtlan->baseFilter));
1146
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(filtlan->baseFilter,filtlan->intervals,npoints,HighLowFlags,filtlan->baseDegree));
1147
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(PetscInfo(st,"Computed value of yLimit = %g\n",(double)filtlan->filterInfo->yLimit));
1148 }
1149 88 ctx->filtch = PETSC_FALSE;
1150
1151 /* create shell matrix*/
1152
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
88 if (!*G) {
1153
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(MatGetSize(ctx->T,&N,&M));
1154
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(MatGetLocalSize(ctx->T,&n,&m));
1155
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)st),n,m,N,M,st,G));
1156
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(MatShellSetOperation(*G,MATOP_MULT,(PetscErrorCodeFn*)MatMult_FILTLAN));
1157
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSE,MATDENSE));
1158
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSECUDA,MATDENSECUDA));
1159
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSEHIP,MATDENSEHIP));
1160 }
1161
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
16 PetscFunctionReturn(PETSC_SUCCESS);
1162 }
1163
1164 287 static PetscErrorCode STFilterGetThreshold_Filter_FILTLAN(ST st,PetscReal *gamma)
1165 {
1166 287 ST_FILTER *ctx = (ST_FILTER*)st->data;
1167 287 FILTLAN_CTX filtlan = (FILTLAN_CTX)ctx->data;
1168
1169
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
287 PetscFunctionBegin;
1170 287 *gamma = filtlan->filterInfo->yLimit;
1171
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
287 PetscFunctionReturn(PETSC_SUCCESS);
1172 }
1173
1174 68 static PetscErrorCode STDestroy_Filter_FILTLAN(ST st)
1175 {
1176 68 ST_FILTER *ctx = (ST_FILTER*)st->data;
1177 68 FILTLAN_CTX filtlan = (FILTLAN_CTX)ctx->data;
1178
1179
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
68 PetscFunctionBegin;
1180
5/8
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
68 PetscCall(PetscFree(filtlan->opts));
1181
5/8
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
68 PetscCall(PetscFree(filtlan->filterInfo));
1182
5/8
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
68 PetscCall(PetscFree(filtlan->baseFilter));
1183
6/8
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
68 PetscCall(PetscFree(filtlan));
1184
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
12 PetscFunctionReturn(PETSC_SUCCESS);
1185 }
1186
1187 68 PetscErrorCode STCreate_Filter_FILTLAN(ST st)
1188 {
1189 68 ST_FILTER *ctx = (ST_FILTER*)st->data;
1190 68 FILTLAN_CTX filtlan;
1191 68 FILTLAN_IOP iop;
1192 68 FILTLAN_PFI pfi;
1193
1194
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
68 PetscFunctionBegin;
1195
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
68 PetscCall(PetscNew(&filtlan));
1196 68 ctx->data = (void*)filtlan;
1197 68 filtlan->baseDegree = 10;
1198
1199
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
68 PetscCall(PetscNew(&iop));
1200 68 filtlan->opts = iop;
1201 68 iop->intervalWeights[0] = 100.0;
1202 68 iop->intervalWeights[1] = 1.0;
1203 68 iop->intervalWeights[2] = 1.0;
1204 68 iop->intervalWeights[3] = 1.0;
1205 68 iop->intervalWeights[4] = 100.0;
1206 68 iop->transIntervalRatio = 0.6;
1207 68 iop->reverseInterval = PETSC_FALSE;
1208 68 iop->initialPlateau = 0.1;
1209 68 iop->plateauShrinkRate = 1.5;
1210 68 iop->initialShiftStep = 0.01;
1211 68 iop->shiftStepExpanRate = 1.5;
1212 68 iop->maxInnerIter = 30;
1213 68 iop->yLimitTol = 0.0001;
1214 68 iop->maxOuterIter = 50;
1215 68 iop->numGridPoints = 200;
1216 68 iop->yBottomLine = 0.001;
1217 68 iop->yRippleLimit = 0.8;
1218
1219
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
68 PetscCall(PetscNew(&pfi));
1220 68 filtlan->filterInfo = pfi;
1221
1222 68 ctx->computeoperator = STComputeOperator_Filter_FILTLAN;
1223 68 ctx->getthreshold = STFilterGetThreshold_Filter_FILTLAN;
1224 68 ctx->destroy = STDestroy_Filter_FILTLAN;
1225
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
68 PetscFunctionReturn(PETSC_SUCCESS);
1226 }
1227