GCC Code Coverage Report


Directory: ./
File: src/sys/classes/st/impls/filter/filtlan.c
Date: 2026-02-22 03:58:10
Exec Total Coverage
Lines: 585 609 96.1%
Functions: 18 18 100.0%
Branches: 716 1158 61.8%

Line Branch Exec Source
1 /*
2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 SLEPc - Scalable Library for Eigenvalue Problem Computations
4 Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5
6 This file is part of SLEPc.
7 SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 */
10 /*
11 This file is an adaptation of several subroutines from FILTLAN, the
12 Filtered Lanczos Package, authored by Haw-ren Fang and Yousef Saad.
13
14 More information at:
15 https://www-users.cs.umn.edu/~saad/software/filtlan
16
17 References:
18
19 [1] H. Fang and Y. Saad, "A filtered Lanczos procedure for extreme and interior
20 eigenvalue problems", SIAM J. Sci. Comput. 34(4):A2220-A2246, 2012.
21 */
22
23 #include <slepc/private/stimpl.h>
24 #include "filter.h"
25
26 static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal*,PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal*,PetscInt);
27 static PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal);
28 static PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt,PetscReal,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*);
29
30 /* ////////////////////////////////////////////////////////////////////////////
31 // Newton - Hermite Polynomial Interpolation
32 //////////////////////////////////////////////////////////////////////////// */
33
34 /*
35 FILTLAN function NewtonPolynomial
36
37 build P(z) by Newton's divided differences in the form
38 P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1)),
39 such that P(x(i)) = y(i) for i=1,...,n, where
40 x,y are input vectors of length n, and a is the output vector of length n
41 if x(i)==x(j) for some i!=j, then it is assumed that the derivative of P(z) is to be zero at x(i),
42 and the Hermite polynomial interpolation is applied
43 in general, if there are k x(i)'s with the same value x0, then
44 the j-th order derivative of P(z) is zero at z=x0 for j=1,...,k-1
45 */
46 15064 static inline PetscErrorCode FILTLAN_NewtonPolynomial(PetscInt n,PetscReal *x,PetscReal *y,PetscReal *sa,PetscReal *sf)
47 {
48 15064 PetscReal d,*sx=x,*sy=y;
49 15064 PetscInt j,k;
50
51
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
15064 PetscFunctionBegin;
52
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
15064 PetscCall(PetscArraycpy(sf,sy,n));
53
54 /* apply Newton's finite difference method */
55 15064 sa[0] = sf[0];
56
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
331408 for (j=1;j<n;j++) {
57
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
3796128 for (k=n-1;k>=j;k--) {
58 3479784 d = sx[k]-sx[k-j];
59
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
3479784 if (PetscUnlikely(d == 0.0)) sf[k] = 0.0; /* assume that the derivative is 0.0 and apply the Hermite interpolation */
60 1675608 else sf[k] = (sf[k]-sf[k-1]) / d;
61 }
62 316344 sa[j] = sf[j];
63 }
64
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
3544 PetscFunctionReturn(PETSC_SUCCESS);
65 }
66
67 /*
68 FILTLAN function HermiteBaseFilterInChebyshevBasis
69
70 compute a base filter P(z) which is a continuous, piecewise polynomial P(z) expanded
71 in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials in each interval
72
73 The base filter P(z) equals P_j(z) for z in the j-th interval [intv(j), intv(j+1)), where
74 P_j(z) a Hermite interpolating polynomial
75
76 input:
77 intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
78 HiLowFlags determines the shape of the base filter P(z)
79 Consider the j-th interval [intv(j), intv(j+1)]
80 HighLowFlag[j-1]==1, P(z)==1 for z in [intv(j), intv(j+1)]
81 ==0, P(z)==0 for z in [intv(j), intv(j+1)]
82 ==-1, [intv(j), intv(j+1)] is a transition interval;
83 P(intv(j)) and P(intv(j+1)) are defined such that P(z) is continuous
84 baseDeg is the degree of smoothness of the Hermite (piecewise polynomial) interpolation
85 to be precise, the i-th derivative of P(z) is zero, i.e. d^{i}P(z)/dz^i==0, at all interval
86 end points z=intv(j) for i=1,...,baseDeg
87
88 output:
89 P(z) expanded in a basis of `translated' (scale-and-shift) Chebyshev polynomials
90 to be precise, for z in the j-th interval [intv(j),intv(j+1)), P(z) equals
91 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
92 where S_i(z) is the `translated' Chebyshev polynomial in that interval,
93 S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
94 with T_i(z) the Chebyshev polynomial of the first kind,
95 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
96 the return matrix is the matrix of Chebyshev coefficients pp just described
97
98 note that the degree of P(z) in each interval is (at most) 2*baseDeg+1, with 2*baseDeg+2 coefficients
99 let n be the length of intv; then there are n-1 intervals
100 therefore the return matrix pp is of size (2*baseDeg+2)-by-(n-1)
101 */
102 7532 static PetscErrorCode FILTLAN_HermiteBaseFilterInChebyshevBasis(PetscReal *baseFilter,PetscReal *intv,PetscInt npoints,const PetscInt *HighLowFlags,PetscInt baseDeg)
103 {
104 7532 PetscInt m,ii,jj;
105 7532 PetscReal flag,flag0,flag2,aa,bb,*px,*py,*sx,*sy,*pp,*qq,*sq,*sbf,*work,*currentPoint = intv;
106 7532 const PetscInt *hilo = HighLowFlags;
107
108
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
7532 PetscFunctionBegin;
109 7532 m = 2*baseDeg+2;
110 7532 jj = npoints-1; /* jj is initialized as the number of intervals */
111
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
7532 PetscCall(PetscMalloc5(m,&px,m,&py,m,&pp,m,&qq,m,&work));
112 sbf = baseFilter;
113
114
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
45192 while (jj--) { /* the main loop to compute the Chebyshev coefficients */
115
116 37660 flag = (PetscReal)(*hilo++); /* get the flag of the current interval */
117
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
37660 if (flag == -1.0) { /* flag == -1 means that the current interval is a transition polynomial */
118
119 15064 flag2 = (PetscReal)(*hilo); /* get flag2, the flag of the next interval */
120 15064 flag0 = 1.0-flag2; /* the flag of the previous interval is 1-flag2 */
121
122 /* two pointers for traversing x[] and y[] */
123 15064 sx = px;
124 15064 sy = py;
125
126 /* find the current interval [aa,bb] */
127 15064 aa = *currentPoint++;
128 15064 bb = *currentPoint;
129
130 /* now left-hand side */
131 15064 ii = baseDeg+1;
132
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
180768 while (ii--) {
133 165704 *sy++ = flag0;
134 165704 *sx++ = aa;
135 }
136
137 /* now right-hand side */
138 ii = baseDeg+1;
139
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
180768 while (ii--) {
140 165704 *sy++ = flag2;
141 165704 *sx++ = bb;
142 }
143
144 /* build a Newton polynomial (indeed, the generalized Hermite interpolating polynomial) with x[] and y[] */
145
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
15064 PetscCall(FILTLAN_NewtonPolynomial(m,px,py,pp,work));
146
147 /* pp contains coefficients of the Newton polynomial P(z) in the current interval [aa,bb], where
148 P(z) = pp(1) + pp(2)*(z-px(1)) + pp(3)*(z-px(1))*(z-px(2)) + ... + pp(n)*(z-px(1))*...*(z-px(n-1)) */
149
150 /* translate the Newton coefficients to the Chebyshev coefficients */
151
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
15064 PetscCall(FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(m,aa,bb,pp,px,qq,work));
152 /* qq contains coefficients of the polynomial in [aa,bb] in the `translated' Chebyshev basis */
153
154 /* copy the Chebyshev coefficients to baseFilter
155 OCTAVE/MATLAB: B(:,j) = qq, where j = (npoints-1)-jj and B is the return matrix */
156 15064 sq = qq;
157 15064 ii = 2*baseDeg+2;
158
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
346472 while (ii--) *sbf++ = *sq++;
159
160 } else {
161
162 /* a constant polynomial P(z)=flag, where either flag==0 or flag==1
163 OCTAVE/MATLAB: B(1,j) = flag, where j = (npoints-1)-jj and B is the return matrix */
164 22596 *sbf++ = flag;
165
166 /* the other coefficients are all zero, since P(z) is a constant
167 OCTAVE/MATLAB: B(1,j) = 0, where j = (npoints-1)-jj and B is the return matrix */
168 22596 ii = 2*baseDeg+1;
169
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
497112 while (ii--) *sbf++ = 0.0;
170
171 /* for the next point */
172 22596 currentPoint++;
173 }
174 }
175
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
7532 PetscCall(PetscFree5(px,py,pp,qq,work));
176
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
1772 PetscFunctionReturn(PETSC_SUCCESS);
177 }
178
179 /* ////////////////////////////////////////////////////////////////////////////
180 // Base Filter
181 //////////////////////////////////////////////////////////////////////////// */
182
183 /*
184 FILTLAN function GetIntervals
185
186 this routine determines the intervals (including the transition one(s)) by an iterative process
187
188 frame is a vector consisting of 4 ordered elements:
189 [frame(1),frame(4)] is the interval which (tightly) contains all eigenvalues, and
190 [frame(2),frame(3)] is the interval in which the eigenvalues are sought
191 baseDeg is the left-and-right degree of the base filter for each interval
192 polyDeg is the (maximum possible) degree of s(z), with z*s(z) the polynomial filter
193 intv is the output; the j-th interval is [intv(j),intv(j+1))
194 opts is a collection of interval options
195
196 the base filter P(z) is a piecewise polynomial from Hermite interpolation with degree baseDeg
197 at each end point of intervals
198
199 the polynomial filter Q(z) is in the form z*s(z), i.e. Q(0)==0, such that ||(1-P(z))-z*s(z)||_w is
200 minimized with s(z) a polynomial of degree up to polyDeg
201
202 the resulting polynomial filter Q(z) satisfies Q(x)>=Q(y) for x in [frame[1],frame[2]], and
203 y in [frame[0],frame[3]] but not in [frame[1],frame[2]]
204
205 the routine fills a PolynomialFilterInfo struct which gives some information of the polynomial filter
206 */
207 60 static PetscErrorCode FILTLAN_GetIntervals(PetscReal *intervals,PetscReal *frame,PetscInt polyDeg,PetscInt baseDeg,FILTLAN_IOP opts,FILTLAN_PFI filterInfo)
208 {
209 60 PetscReal intv[6],x,y,z1,z2,c,c1,c2,fc,fc2,halfPlateau,leftDelta,rightDelta,gridSize;
210 60 PetscReal yLimit,ySummit,yLeftLimit,yRightLimit,bottom,qIndex,*baseFilter,*polyFilter;
211 60 PetscReal yLimitGap=0.0,yLeftSummit=0.0,yLeftBottom=0.0,yRightSummit=0.0,yRightBottom=0.0;
212 60 PetscInt i,ii,npoints,numIter,numLeftSteps=1,numRightSteps=1,numMoreLooked=0;
213 60 PetscBool leftBoundaryMet=PETSC_FALSE,rightBoundaryMet=PETSC_FALSE,stepLeft,stepRight;
214 60 const PetscReal a=frame[0],a1=frame[1],b1=frame[2],b=frame[3];
215 60 const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 }; /* if filterType is 1, only first 3 elements will be used */
216 60 const PetscInt numLookMore = 2*(PetscInt)(0.5+(PetscLogReal(2.0)/PetscLogReal(opts->shiftStepExpanRate)));
217
218
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
60 PetscFunctionBegin;
219
3/8
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 8 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
60 PetscCheck(a<=a1 && a1<=b1 && b1<=b,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Values in the frame vector should be non-decreasing");
220
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
60 PetscCheck(a1!=b1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The range of wanted eigenvalues cannot be of size zero");
221 60 filterInfo->filterType = 2; /* mid-pass filter, for interior eigenvalues */
222
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
60 if (b == b1) {
223 PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"A polynomial filter should not cover all eigenvalues");
224 filterInfo->filterType = 1; /* high-pass filter, for largest eigenvalues */
225
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
60 } else PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"filterType==3 for smallest eigenvalues should be pre-converted to filterType==1 for largest eigenvalues");
226
227 /* the following recipe follows Yousef Saad (2005, 2006) with a few minor adaptations / enhancements */
228 60 halfPlateau = 0.5*(b1-a1)*opts->initialPlateau; /* half length of the "plateau" of the (dual) base filter */
229 60 leftDelta = (b1-a1)*opts->initialShiftStep; /* initial left shift */
230 60 rightDelta = leftDelta; /* initial right shift */
231 60 opts->numGridPoints = PetscMax(opts->numGridPoints,(PetscInt)(2.0*(b-a)/halfPlateau));
232 60 gridSize = (b-a) / (PetscReal)opts->numGridPoints;
233
234
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
420 for (i=0;i<6;i++) intv[i] = 0.0;
235
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
60 if (filterInfo->filterType == 2) { /* for interior eigenvalues */
236 60 npoints = 6;
237 60 intv[0] = a;
238 60 intv[5] = b;
239 /* intv[1], intv[2], intv[3], and intv[4] to be determined */
240 } else { /* filterType == 1 (or 3 with conversion), for extreme eigenvalues */
241 npoints = 4;
242 intv[0] = a;
243 intv[3] = b;
244 /* intv[1], and intv[2] to be determined */
245 }
246 60 z1 = a1 - leftDelta;
247 60 z2 = b1 + rightDelta;
248 60 filterInfo->filterOK = 0; /* not yet found any OK filter */
249
250 /* allocate matrices */
251
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
60 PetscCall(PetscMalloc2((polyDeg+2)*(npoints-1),&polyFilter,(2*baseDeg+2)*(npoints-1),&baseFilter));
252
253 /* initialize the intervals, mainly for the case opts->maxOuterIter == 0 */
254 60 intervals[0] = intv[0];
255 60 intervals[3] = intv[3];
256 60 intervals[5] = intv[5];
257 60 intervals[1] = z1;
258
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
60 if (filterInfo->filterType == 2) { /* a mid-pass filter for interior eigenvalues */
259 60 intervals[4] = z2;
260 60 c = (a1+b1) / 2.0;
261 60 intervals[2] = c - halfPlateau;
262 60 intervals[3] = c + halfPlateau;
263 } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
264 intervals[2] = z1 + (b1-z1)*opts->transIntervalRatio;
265 }
266
267 /* the main loop */
268
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
608 for (numIter=1;numIter<=opts->maxOuterIter;numIter++) {
269
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
608 if (z1 <= a) { /* outer loop updates z1 and z2 */
270 144 z1 = a;
271 144 leftBoundaryMet = PETSC_TRUE;
272 }
273
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
608 if (filterInfo->filterType == 2) { /* a <= z1 < (a1) */
274
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
608 if (z2 >= b) { /* a mid-pass filter for interior eigenvalues */
275 z2 = b;
276 rightBoundaryMet = PETSC_TRUE;
277 }
278 /* a <= z1 < c-h < c+h < z2 <= b, where h is halfPlateau */
279 /* [z1, c-h] and [c+h, z2] are transition interval */
280 608 intv[1] = z1;
281 608 intv[4] = z2;
282 608 c1 = z1 + halfPlateau;
283 608 intv[2] = z1; /* i.e. c1 - halfPlateau */
284 608 intv[3] = c1 + halfPlateau;
285
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
608 PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
286
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
608 PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
287 /* fc1 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); */
288 608 c2 = z2 - halfPlateau;
289 608 intv[2] = c2 - halfPlateau;
290 608 intv[3] = z2; /* i.e. c2 + halfPlateau */
291
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
608 PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
292
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
608 PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
293 608 fc2 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
294 608 yLimitGap = PETSC_MAX_REAL;
295 608 ii = opts->maxInnerIter;
296
3/4
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 8 times.
6864 while (ii-- && !(yLimitGap <= opts->yLimitTol)) {
297 /* recursive bisection to get c such that p(a1) are p(b1) approximately the same */
298 6256 c = (c1+c2) / 2.0;
299 6256 intv[2] = c - halfPlateau;
300 6256 intv[3] = c + halfPlateau;
301
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
6256 PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
302
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
6256 PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
303 6256 fc = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
304
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
6256 if (fc*fc2 < 0.0) {
305 c1 = c;
306 /* fc1 = fc; */
307 } else {
308 3426 c2 = c;
309 3426 fc2 = fc;
310 }
311 6256 yLimitGap = PetscAbsReal(fc);
312 }
313 } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
314 intv[1] = z1;
315 intv[2] = z1 + (b1-z1)*opts->transIntervalRatio;
316 PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,4,HighLowFlags,baseDeg));
317 PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,4,opts->intervalWeights,polyDeg));
318 }
319 /* polyFilter contains the coefficients of the polynomial filter which approximates phi(x)
320 expanded in the `translated' Chebyshev basis */
321 /* psi(x) = 1.0 - phi(x) is the dual base filter approximated by a polynomial in the form x*p(x) */
322 608 yLeftLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
323 608 yRightLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1);
324
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
608 yLimit = (yLeftLimit < yRightLimit) ? yLeftLimit : yRightLimit;
325
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
608 ySummit = (yLeftLimit > yRightLimit) ? yLeftLimit : yRightLimit;
326 x = a1;
327
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
24320 while ((x+=gridSize) < b1) {
328 23712 y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
329
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
23712 if (y < yLimit) yLimit = y;
330
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
23712 if (y > ySummit) ySummit = y;
331 }
332 /* now yLimit is the minimum of x*p(x) for x in [a1, b1] */
333 608 stepLeft = PETSC_FALSE;
334 608 stepRight = PETSC_FALSE;
335
6/6
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 8 times.
608 if ((yLimit < yLeftLimit && yLimit < yRightLimit) || yLimit < opts->yBottomLine) {
336 /* very bad, step to see what will happen */
337 200 stepLeft = PETSC_TRUE;
338
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
200 if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
339
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
408 } else if (filterInfo->filterType == 2) {
340
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
408 if (yLeftLimit < yRightLimit) {
341
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
230 if (yRightLimit-yLeftLimit > opts->yLimitTol) stepLeft = PETSC_TRUE;
342
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
178 } else if (yLeftLimit-yRightLimit > opts->yLimitTol) stepRight = PETSC_TRUE;
343 }
344
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
608 if (!stepLeft) {
345 yLeftBottom = yLeftLimit;
346 x = a1;
347
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
20372 while ((x-=gridSize) >= a) {
348 20332 y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
349
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
20332 if (y < yLeftBottom) yLeftBottom = y;
350
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 1 times.
368 else if (y > yLeftBottom) break;
351 }
352 yLeftSummit = yLeftBottom;
353
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
1193388 while ((x-=gridSize) >= a) {
354 1192980 y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
355
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
1192980 if (y > yLeftSummit) {
356 17072 yLeftSummit = y;
357
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
17072 if (yLeftSummit > yLimit*opts->yRippleLimit) {
358 stepLeft = PETSC_TRUE;
359 break;
360 }
361 }
362
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
1192980 if (y < yLeftBottom) yLeftBottom = y;
363 }
364 }
365
3/4
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 8 times.
608 if (filterInfo->filterType == 2 && !stepRight) {
366 yRightBottom = yRightLimit;
367 x = b1;
368
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
21666 while ((x+=gridSize) <= b) {
369 21666 y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
370
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
21666 if (y < yRightBottom) yRightBottom = y;
371
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 1 times.
408 else if (y > yRightBottom) break;
372 }
373 yRightSummit = yRightBottom;
374
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
955330 while ((x+=gridSize) <= b) {
375 954922 y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
376
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
954922 if (y > yRightSummit) {
377 17188 yRightSummit = y;
378
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
17188 if (yRightSummit > yLimit*opts->yRippleLimit) {
379 stepRight = PETSC_TRUE;
380 break;
381 }
382 }
383
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
954922 if (y < yRightBottom) yRightBottom = y;
384 }
385 }
386
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
608 if (!stepLeft && !stepRight) {
387
3/4
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 8 times.
408 if (filterInfo->filterType == 2) bottom = PetscMin(yLeftBottom,yRightBottom);
388 else bottom = yLeftBottom;
389 408 qIndex = 1.0 - (yLimit-bottom) / (ySummit-bottom);
390
4/4
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 8 times.
408 if (filterInfo->filterOK == 0 || filterInfo->filterQualityIndex < qIndex) {
391 /* found the first OK filter or a better filter */
392
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
1176 for (i=0;i<6;i++) intervals[i] = intv[i];
393 168 filterInfo->filterOK = 1;
394 168 filterInfo->filterQualityIndex = qIndex;
395 168 filterInfo->numIter = numIter;
396 168 filterInfo->yLimit = yLimit;
397 168 filterInfo->ySummit = ySummit;
398 168 filterInfo->numLeftSteps = numLeftSteps;
399 168 filterInfo->yLeftSummit = yLeftSummit;
400 168 filterInfo->yLeftBottom = yLeftBottom;
401
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
168 if (filterInfo->filterType == 2) {
402 168 filterInfo->yLimitGap = yLimitGap;
403 168 filterInfo->numRightSteps = numRightSteps;
404 168 filterInfo->yRightSummit = yRightSummit;
405 168 filterInfo->yRightBottom = yRightBottom;
406 }
407 numMoreLooked = 0;
408
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
240 } else if (++numMoreLooked == numLookMore) {
409 /* filter has been optimal */
410 60 filterInfo->filterOK = 2;
411 60 break;
412 }
413 /* try stepping further to see whether it can improve */
414 348 stepLeft = PETSC_TRUE;
415
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
348 if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
416 }
417 /* check whether we can really "step" */
418
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
548 if (leftBoundaryMet) {
419
2/4
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
112 if (filterInfo->filterType == 1 || rightBoundaryMet) break; /* cannot step further, so break the loop */
420
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
112 if (stepLeft) {
421 /* cannot step left, so try stepping right */
422 112 stepLeft = PETSC_FALSE;
423 112 stepRight = PETSC_TRUE;
424 }
425 }
426
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
548 if (rightBoundaryMet && stepRight) {
427 /* cannot step right, so try stepping left */
428 stepRight = PETSC_FALSE;
429 stepLeft = PETSC_TRUE;
430 }
431 /* now "step" */
432
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
548 if (stepLeft) {
433 436 numLeftSteps++;
434
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
436 if (filterInfo->filterType == 2) leftDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
435 436 z1 -= leftDelta;
436 }
437
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
548 if (stepRight) {
438 548 numRightSteps++;
439 548 rightDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
440 548 z2 += rightDelta;
441 }
442
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
548 if (filterInfo->filterType == 2) {
443 /* shrink the "plateau" of the (dual) base filter */
444
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
548 if (stepLeft && stepRight) halfPlateau /= opts->plateauShrinkRate;
445 112 else halfPlateau /= PetscSqrtReal(opts->plateauShrinkRate);
446 }
447 }
448
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
60 PetscCheck(filterInfo->filterOK,PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"STFILTER cannot get the filter specified; please adjust your filter parameters (e.g. increasing the polynomial degree)");
449
450 60 filterInfo->totalNumIter = numIter;
451
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
60 PetscCall(PetscFree2(polyFilter,baseFilter));
452
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
14 PetscFunctionReturn(PETSC_SUCCESS);
453 }
454
455 /* ////////////////////////////////////////////////////////////////////////////
456 // Chebyshev Polynomials
457 //////////////////////////////////////////////////////////////////////////// */
458
459 /*
460 FILTLAN function ExpandNewtonPolynomialInChebyshevBasis
461
462 translate the coefficients of a Newton polynomial to the coefficients
463 in a basis of the `translated' (scale-and-shift) Chebyshev polynomials
464
465 input:
466 a Newton polynomial defined by vectors a and x:
467 P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1))
468 the interval [aa,bb] defines the `translated' Chebyshev polynomials S_i(z) = T_i((z-c)/h),
469 where c=(aa+bb)/2 and h=(bb-aa)/2, and T_i is the Chebyshev polynomial of the first kind
470 note that T_i is defined by T_0(z)=1, T_1(z)=z, and T_i(z)=2*z*T_{i-1}(z)+T_{i-2}(z) for i>=2
471
472 output:
473 a vector q containing the Chebyshev coefficients:
474 P(z) = q(1)*S_0(z) + q(2)*S_1(z) + ... + q(n)*S_{n-1}(z)
475 */
476 15064 static inline PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt n,PetscReal aa,PetscReal bb,PetscReal *a,PetscReal *x,PetscReal *q,PetscReal *q2)
477 {
478 15064 PetscInt m,mm;
479 15064 PetscReal *sa,*sx,*sq,*sq2,c,c2,h,h2;
480
481
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
15064 PetscFunctionBegin;
482 15064 sa = a+n; /* pointers for traversing a and x */
483 15064 sx = x+n-1;
484 15064 *q = *--sa; /* set q[0] = a(n) */
485
486 15064 c = (aa+bb)/2.0;
487 15064 h = (bb-aa)/2.0;
488 15064 h2 = h/2.0;
489
490
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
331408 for (m=1;m<=n-1;m++) { /* the main loop for translation */
491
492 /* compute q2[0:m-1] = (c-x[n-m-1])*q[0:m-1] */
493 316344 mm = m;
494 316344 sq = q;
495 316344 sq2 = q2;
496 316344 c2 = c-(*--sx);
497
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
3796128 while (mm--) *(sq2++) = c2*(*sq++);
498 316344 *sq2 = 0.0; /* q2[m] = 0.0 */
499 316344 *(q2+1) += h*(*q); /* q2[1] = q2[1] + h*q[0] */
500
501 /* compute q2[0:m-2] = q2[0:m-2] + h2*q[1:m-1] */
502 316344 mm = m-1;
503 316344 sq2 = q2;
504 316344 sq = q+1;
505
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
3479784 while (mm--) *(sq2++) += h2*(*sq++);
506
507 /* compute q2[2:m] = q2[2:m] + h2*q[1:m-1] */
508 316344 mm = m-1;
509 316344 sq2 = q2+2;
510 316344 sq = q+1;
511
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
3479784 while (mm--) *(sq2++) += h2*(*sq++);
512
513 /* compute q[0:m] = q2[0:m] */
514 316344 mm = m+1;
515 316344 sq2 = q2;
516 316344 sq = q;
517
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
4112472 while (mm--) *sq++ = *sq2++;
518 316344 *q += (*--sa); /* q[0] = q[0] + p[n-m-1] */
519 }
520
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
15064 PetscFunctionReturn(PETSC_SUCCESS);
521 }
522
523 /*
524 FILTLAN function PolynomialEvaluationInChebyshevBasis
525
526 evaluate P(z) at z=z0, where P(z) is a polynomial expanded in a basis of
527 the `translated' (i.e. scale-and-shift) Chebyshev polynomials
528
529 input:
530 c is a vector of Chebyshev coefficients which defines the polynomial
531 P(z) = c(1)*S_0(z) + c(2)*S_1(z) + ... + c(n)*S_{n-1}(z),
532 where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
533 c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2
534 note that T_i(z) is the Chebyshev polynomial of the first kind,
535 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
536
537 output:
538 the evaluated value of P(z) at z=z0
539 */
540 2228556 static inline PetscReal FILTLAN_PolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscInt idx,PetscReal z0,PetscReal aa,PetscReal bb)
541 {
542 2228556 PetscInt ii,deg1;
543 2228556 PetscReal y,zz,t0,t1,t2,*sc;
544
545
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2228556 PetscFunctionBegin;
546 2228556 deg1 = m;
547
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
2228556 if (aa==-1.0 && bb==1.0) zz = z0; /* treat it as a special case to reduce rounding errors */
548
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
2228556 else zz = (aa==bb)? 0.0 : -1.0+2.0*(z0-aa)/(bb-aa);
549
550 /* compute y = P(z0), where we utilize the Chebyshev recursion */
551 2228556 sc = pp+(idx-1)*m; /* sc points to column idx of pp */
552 2228556 y = *sc++;
553 2228556 t1 = 1.0; t2 = zz;
554 2228556 ii = deg1-1;
555
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
441314312 while (ii--) {
556 /* Chebyshev recursion: T_0(zz)=1, T_1(zz)=zz, and T_{i+1}(zz) = 2*zz*T_i(zz) + T_{i-1}(zz) for i>=2
557 the values of T_{i+1}(zz), T_i(zz), T_{i-1}(zz) are stored in t0, t1, t2, respectively */
558 439085756 t0 = 2*zz*t1 - t2;
559 /* it also works for the base case / the first iteration, where t0 equals 2*zz*1-zz == zz which is T_1(zz) */
560 439085756 t2 = t1;
561 439085756 t1 = t0;
562 439085756 y += (*sc++)*t0;
563 }
564
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
2228556 PetscFunctionReturn(y);
565 }
566
567 #define basisTranslated PETSC_TRUE
568 /*
569 FILTLAN function PiecewisePolynomialEvaluationInChebyshevBasis
570
571 evaluate P(z) at z=z0, where P(z) is a piecewise polynomial expanded
572 in a basis of the (optionally translated, i.e. scale-and-shift) Chebyshev polynomials for each interval
573
574 input:
575 intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
576 pp is a matrix of Chebyshev coefficients which defines a piecewise polynomial P(z)
577 in a basis of the `translated' Chebyshev polynomials in each interval
578 the polynomial P_j(z) in the j-th interval, i.e. when z is in [intv(j), intv(j+1)), is defined by the j-th column of pp:
579 if basisTranslated == false, then
580 P_j(z) = pp(1,j)*T_0(z) + pp(2,j)*T_1(z) + ... + pp(n,j)*T_{n-1}(z),
581 where T_i(z) is the Chebyshev polynomial of the first kind,
582 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
583 if basisTranslated == true, then
584 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
585 where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
586 c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2
587
588 output:
589 the evaluated value of P(z) at z=z0
590
591 note that if z0 falls below the first interval, then the polynomial in the first interval will be used to evaluate P(z0)
592 if z0 flies over the last interval, then the polynomial in the last interval will be used to evaluate P(z0)
593 */
594 2228556 static inline PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscReal *intv,PetscInt npoints,PetscReal z0)
595 {
596 2228556 PetscReal *sintv,aa,bb,resul;
597 2228556 PetscInt idx;
598
599
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2228556 PetscFunctionBegin;
600 /* determine the interval which contains z0 */
601 2228556 sintv = &intv[1];
602 2228556 idx = 1;
603
3/4
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 8 times.
2228556 if (npoints>2 && z0 >= *sintv) {
604 1020572 sintv++;
605
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
3980064 while (++idx < npoints-1) {
606
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
3010956 if (z0 < *sintv) break;
607 2959492 sintv++;
608 }
609 }
610 /* idx==1 if npoints<=2; otherwise idx satisfies:
611 intv(idx) <= z0 < intv(idx+1), if 2 <= idx <= npoints-2
612 z0 < intv(idx+1), if idx == 1
613 intv(idx) <= z0, if idx == npoints-1
614 in addition, sintv points to &intv(idx+1) */
615 2228556 if (basisTranslated) {
616 /* the basis consists of `translated' Chebyshev polynomials */
617 /* find the interval of concern, [aa,bb] */
618 2228556 aa = *(sintv-1);
619 2228556 bb = *sintv;
620 2228556 resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,aa,bb);
621 } else {
622 /* the basis consists of standard Chebyshev polynomials, with interval [-1.0,1.0] for integration */
623 resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,-1.0,1.0);
624 }
625
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
2228556 PetscFunctionReturn(resul);
626 }
627
628 /*
629 FILTLAN function PiecewisePolynomialInnerProductInChebyshevBasis
630
631 compute the weighted inner product of two piecewise polynomials expanded
632 in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
633
634 pp and qq are two matrices of Chebyshev coefficients which define the piecewise polynomials P(z) and Q(z), respectively
635 for z in the j-th interval, P(z) equals
636 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
637 and Q(z) equals
638 Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
639 where S_i(z) is the `translated' Chebyshev polynomial in that interval,
640 S_i((z-c)/h) = T_i(z), c = (aa+bb)) / 2, h = (bb-aa) / 2,
641 with T_i(z) the Chebyshev polynomial of the first kind
642 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
643
644 the (scaled) j-th interval inner product is defined by
645 <P_j,Q_j> = (Pi/2)*(pp(1,j)*qq(1,j) + sum_{k} pp(k,j)*qq(k,j)),
646 which comes from the property
647 <T_0,T_0>=pi, <T_i,T_i>=pi/2 for i>=1, and <T_i,T_j>=0 for i!=j
648
649 the weighted inner product is <P,Q> = sum_{j} intervalWeights(j)*<P_j,Q_j>,
650 which is the return value
651
652 note that for unit weights, pass an empty vector of intervalWeights (i.e. of length 0)
653 */
654 4877355 static inline PetscReal FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(PetscReal *pp,PetscInt prows,PetscInt pcols,PetscInt ldp,PetscReal *qq,PetscInt qrows,PetscInt qcols,PetscInt ldq,const PetscReal *intervalWeights)
655 {
656 4877355 PetscInt nintv,deg1,i,k;
657 4877355 PetscReal *sp,*sq,ans=0.0,ans2;
658
659
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
4877355 PetscFunctionBegin;
660 4877355 deg1 = PetscMin(prows,qrows); /* number of effective coefficients, one more than the effective polynomial degree */
661
2/14
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
4877355 if (PetscUnlikely(!deg1)) PetscFunctionReturn(0.0);
662 4877355 nintv = PetscMin(pcols,qcols); /* number of intervals */
663
664 /* scaled by intervalWeights(i) in the i-th interval (we assume intervalWeights[] are always provided).
665 compute ans = sum_{i=1,...,nintv} intervalWeights(i)*[ pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) ] */
666
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
29264130 for (i=0;i<nintv;i++) { /* compute ans2 = pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) */
667 24386775 sp = pp+i*ldp;
668 24386775 sq = qq+i*ldq;
669 24386775 ans2 = (*sp) * (*sq); /* the first term pp(1,i)*qq(1,i) is being added twice */
670
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
1422099675 for (k=0;k<deg1;k++) ans2 += (*sp++)*(*sq++); /* add pp(k,i)*qq(k,i) */
671 24386775 ans += ans2*intervalWeights[i]; /* compute ans += ans2*intervalWeights(i) */
672 }
673
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
4877355 PetscFunctionReturn(ans*PETSC_PI/2.0);
674 }
675
676 /*
677 FILTLAN function PiecewisePolynomialInChebyshevBasisMultiplyX
678
679 compute Q(z) = z*P(z), where P(z) and Q(z) are piecewise polynomials expanded
680 in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
681
682 P(z) and Q(z) are stored as matrices of Chebyshev coefficients pp and qq, respectively
683
684 For z in the j-th interval, P(z) equals
685 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
686 and Q(z) equals
687 Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
688 where S_i(z) is the `translated' Chebyshev polynomial in that interval,
689 S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
690 with T_i(z) the Chebyshev polynomial of the first kind
691 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
692
693 the returned matrix is qq which represents Q(z) = z*P(z)
694 */
695 1628955 static inline PetscErrorCode FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(PetscReal *pp,PetscInt deg1,PetscInt ldp,PetscReal *intv,PetscInt nintv,PetscReal *qq,PetscInt ldq)
696 {
697 1628955 PetscInt i,j;
698 1628955 PetscReal c,h,h2,tmp,*sp,*sq,*sp2,*sq2;
699
700
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
1628955 PetscFunctionBegin;
701
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
9773730 for (j=0;j<nintv;j++) { /* consider interval between intv(j) and intv(j+1) */
702 8144775 sp = pp+j*ldp;
703 8144775 sq = qq+j*ldq;
704 8144775 sp2 = sp;
705 8144775 sq2 = sq+1;
706 8144775 c = 0.5*(intv[j] + intv[j+1]); /* compute c = (intv(j) + intv(j+1))/2 */
707 8144775 h = 0.5*(intv[j+1] - intv[j]); /* compute h = (intv(j+1) - intv(j))/2 and h2 = h/2 */
708 8144775 h2 = 0.5*h;
709 8144775 i = deg1;
710
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
620958825 while (i--) *sq++ = c*(*sp++); /* compute q(1:deg1,j) = c*p(1:deg1,j) */
711 8144775 *sq++ = 0.0; /* set q(deg1+1,j) = 0.0 */
712 8144775 *(sq2++) += h*(*sp2++); /* compute q(2,j) = q(2,j) + h*p(1,j) */
713 8144775 i = deg1-1;
714
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
612814050 while (i--) { /* compute q(3:deg1+1,j) += h2*p(2:deg1,j) and then q(1:deg1-1,j) += h2*p(2:deg1,j) */
715 604669275 tmp = h2*(*sp2++);
716 604669275 *(sq2-2) += tmp;
717 604669275 *(sq2++) += tmp;
718 }
719 }
720
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
1628955 PetscFunctionReturn(PETSC_SUCCESS);
721 }
722
723 /* ////////////////////////////////////////////////////////////////////////////
724 // Conjugate Residual Method in the Polynomial Space
725 //////////////////////////////////////////////////////////////////////////// */
726
727 /*
728 B := alpha*A + beta*B
729
730 A,B are nxk
731 */
732 6481856 static inline PetscErrorCode Mat_AXPY_BLAS(PetscInt n,PetscInt k,PetscReal alpha,const PetscReal *A,PetscInt lda,PetscReal beta,PetscReal *B,PetscInt ldb)
733 {
734 6481856 PetscInt i,j;
735
736
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
6481856 PetscFunctionBegin;
737
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
6481856 if (beta==(PetscReal)1.0) {
738
4/4
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 8 times.
1255195400 for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] += alpha*A[i+j*lda];
739
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
3248400 PetscCall(PetscLogFlops(2.0*n*k));
740 } else {
741
4/4
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 8 times.
1252867936 for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] = alpha*A[i+j*lda] + beta*B[i+j*ldb];
742
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
3233456 PetscCall(PetscLogFlops(3.0*n*k));
743 }
744
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
6481856 PetscFunctionReturn(PETSC_SUCCESS);
745 }
746
747 /*
748 FILTLAN function FilteredConjugateResidualPolynomial
749
750 ** Conjugate Residual Method in the Polynomial Space
751
752 this routine employs a conjugate-residual-type algorithm in polynomial space to minimize ||P(z)-Q(z)||_w,
753 where P(z), the base filter, is the input piecewise polynomial, and
754 Q(z) is the output polynomial satisfying Q(0)==1, i.e. the constant term of Q(z) is 1
755 niter is the number of conjugate-residual iterations; therefore, the degree of Q(z) is up to niter+1
756 both P(z) and Q(z) are expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
757 and presented as matrices of Chebyshev coefficients, denoted by pp and qq, respectively
758
759 input:
760 intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
761 w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
762 the interval weights define the inner product of two continuous functions and then
763 the derived w-norm ||P(z)-Q(z)||_w
764 pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
765 to be specific, for z in [intv(j), intv(j+1)), P(z) equals
766 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
767 where S_i(z) is the `translated' Chebyshev polynomial in that interval,
768 S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
769 with T_i(z) the Chebyshev polynomial of the first kind,
770 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
771
772 output:
773 the return matrix, denoted by qq, represents a polynomial Q(z) with degree up to 1+niter
774 and satisfying Q(0)==1, such that ||P(z))-Q(z)||_w is minimized
775 this polynomial Q(z) is expanded in the `translated' Chebyshev basis for each interval
776 to be precise, considering z in [intv(j), intv(j+1)), Q(z) equals
777 Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(niter+2,j)*S_{niter+1}(z)
778
779 note:
780 1. since Q(0)==1, P(0)==1 is expected; if P(0)!=1, one can translate P(z)
781 for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
782 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
783 in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
784 */
785 7472 static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal *cpol,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt m,PetscReal *intervalWeights,PetscInt niter)
786 {
787 7472 PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld,nintv;
788 7472 PetscReal rho,rho0,rho1,den,bet,alp,alp0,*ppol,*rpol,*appol,*arpol;
789
790
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
7472 PetscFunctionBegin;
791 7472 nintv = m-1;
792 7472 ld = niter+2; /* leading dimension */
793
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
7472 PetscCall(PetscCalloc4(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&appol,ld*nintv,&arpol));
794
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
7472 PetscCall(PetscArrayzero(cpol,ld*nintv));
795 /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
796 44832 sppol = 2;
797 44832 srpol = 2;
798 44832 scpol = 2;
799
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
44832 for (j=0;j<nintv;j++) {
800 37360 ppol[j*ld] = 1.0;
801 37360 rpol[j*ld] = 1.0;
802 37360 cpol[j*ld] = 1.0;
803 }
804 /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
805 rpol is the r-polynomial (corresponding to the residual vector r in CG)
806 cpol is the "corrected" residual polynomial (result of this function) */
807 7472 sappol = 3;
808 7472 sarpol = 3;
809
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
7472 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
810
4/4
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 8 times.
141968 for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
811 7472 rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
812
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
1018958 for (i=0;i<niter;i++) {
813 1017200 den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
814 1017200 alp0 = rho/den;
815 1017200 rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
816 1017200 alp = (rho-rho1)/den;
817 1017200 srpol++;
818 1017200 scpol++;
819
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1017200 PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
820
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1017200 PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
821
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
1017200 if (i+1 == niter) break;
822 1009728 sarpol++;
823
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1009728 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
824 1009728 rho0 = rho;
825 1009728 rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
826 1009728 bet = rho / rho0;
827 1009728 sppol++;
828 1009728 sappol++;
829
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1009728 PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
830
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1009728 PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
831 }
832
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
7472 PetscCall(PetscFree4(ppol,rpol,appol,arpol));
833
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
1758 PetscFunctionReturn(PETSC_SUCCESS);
834 }
835
836 /*
837 FILTLAN function FilteredConjugateResidualMatrixPolynomialVectorProduct
838
839 this routine employs a conjugate-residual-type algorithm in polynomial space to compute
840 x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized, where
841 P(z) is a given piecewise polynomial, called the base filter,
842 s(z) is a polynomial of degree up to niter, the number of conjugate-residual iterations,
843 and b and x0 are given vectors
844
845 note that P(z) is expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
846 and presented as a matrix of Chebyshev coefficients pp
847
848 input:
849 A is a sparse matrix
850 x0, b are vectors
851 niter is the number of conjugate-residual iterations
852 intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
853 w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
854 the interval weights define the inner product of two continuous functions and then
855 the derived w-norm ||P(z)-Q(z)||_w
856 pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
857 to be specific, for z in [intv(j), intv(j+1)), P(z) equals
858 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
859 where S_i(z) is the `translated' Chebyshev polynomial in that interval,
860 S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
861 with T_i(z) the Chebyshev polynomial of the first kind,
862 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
863 tol is the tolerance; if the residual polynomial in z-norm is dropped by a factor lower
864 than tol, then stop the conjugate-residual iteration
865
866 output:
867 the return vector is x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized,
868 subject to that s(z) is a polynomial of degree up to niter, where P(z) is the base filter
869 in short, z*s(z) approximates 1-P(z)
870
871 note:
872 1. since z*s(z) approximates 1-P(z), P(0)==1 is expected; if P(0)!=1, one can translate P(z)
873 for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
874 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
875 in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
876 3. a common application is to compute R(A)*b, where R(z) approximates 1-P(z)
877 in this case, one can set x0 = 0 and then the return vector is x = s(A)*b, where
878 z*s(z) approximates 1-P(z); therefore, A*x is the wanted R(A)*b
879 */
880 4500 static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(Mat A,Vec b,Vec x,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work)
881 {
882 4500 PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld;
883 4500 PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
884 4500 Vec r=work[0],p=work[1],ap=work[2],w=work[3];
885 4500 PetscScalar alpha;
886
887
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
4500 PetscFunctionBegin;
888 4500 ld = niter+3; /* leading dimension */
889
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4500 PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol));
890 /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
891 27000 sppol = 2;
892 27000 srpol = 2;
893 27000 scpol = 2;
894
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
27000 for (j=0;j<nintv;j++) {
895 22500 ppol[j*ld] = 1.0;
896 22500 rpol[j*ld] = 1.0;
897 22500 cpol[j*ld] = 1.0;
898 }
899 /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
900 rpol is the r-polynomial (corresponding to the residual vector r in CG)
901 cpol is the "corrected" residual polynomial */
902 4500 sappol = 3;
903 4500 sarpol = 3;
904
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4500 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
905
4/4
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 8 times.
85500 for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
906 4500 rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
907 4500 rho = rho00;
908
909 /* corrected CR in vector space */
910 /* we assume x0 is always 0 */
911
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4500 PetscCall(VecSet(x,0.0));
912
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4500 PetscCall(VecCopy(b,r)); /* initial residual r = b-A*x0 */
913
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4500 PetscCall(VecCopy(r,p)); /* p = r */
914
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4500 PetscCall(MatMult(A,p,ap)); /* ap = A*p */
915
916
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
560500 for (i=0;i<niter;i++) {
917 /* iteration in the polynomial space */
918 556000 den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
919 556000 alp0 = rho/den;
920 556000 rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
921 556000 alp = (rho-rho1)/den;
922 556000 srpol++;
923 556000 scpol++;
924
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
556000 PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
925
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
556000 PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
926 556000 sarpol++;
927
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
556000 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
928 556000 rho0 = rho;
929 556000 rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
930
931 /* update x in the vector space */
932 556000 alpha = alp;
933
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
556000 PetscCall(VecAXPY(x,alpha,p)); /* x += alp*p */
934
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
556000 if (rho < tol*rho00) break;
935
936 /* finish the iteration in the polynomial space */
937 556000 bet = rho / rho0;
938 556000 sppol++;
939 556000 sappol++;
940
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
556000 PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
941
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
556000 PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
942
943 /* finish the iteration in the vector space */
944 556000 alpha = -alp0;
945
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
556000 PetscCall(VecAXPY(r,alpha,ap)); /* r -= alp0*ap */
946 556000 alpha = bet;
947
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
556000 PetscCall(VecAYPX(p,alpha,r)); /* p = r + bet*p */
948
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
556000 PetscCall(MatMult(A,r,w)); /* ap = A*r + bet*ap */
949
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
556000 PetscCall(VecAYPX(ap,alpha,w));
950 }
951
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4500 PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol));
952
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
1084 PetscFunctionReturn(PETSC_SUCCESS);
953 }
954
955 /*
956 Gateway to FILTLAN for evaluating y=p(A)*x
957 */
958 4500 static PetscErrorCode MatMult_FILTLAN(Mat A,Vec x,Vec y)
959 {
960 4500 ST st;
961 4500 ST_FILTER *ctx;
962 4500 FILTLAN_CTX filtlan;
963 4500 PetscInt npoints;
964
965
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
4500 PetscFunctionBegin;
966
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4500 PetscCall(MatShellGetContext(A,&st));
967 4500 ctx = (ST_FILTER*)st->data;
968 4500 filtlan = (FILTLAN_CTX)ctx->data;
969
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
4500 npoints = (filtlan->filterInfo->filterType == 2)? 6: 4;
970
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4500 PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(ctx->T,x,y,filtlan->baseFilter,2*filtlan->baseDegree+2,filtlan->intervals,npoints-1,filtlan->opts->intervalWeights,ctx->polyDegree,st->work));
971
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4500 PetscCall(VecCopy(y,st->work[0]));
972
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4500 PetscCall(MatMult(ctx->T,st->work[0],y));
973
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
1084 PetscFunctionReturn(PETSC_SUCCESS);
974 }
975
976 /* Block version of FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct */
977 255 static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(Mat A,Mat B,Mat C,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work,Mat R,Mat P,Mat AP,Mat W)
978 {
979 255 PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld;
980 255 PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
981 255 PetscScalar alpha;
982
983
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
255 PetscFunctionBegin;
984 255 ld = niter+3; /* leading dimension */
985
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
255 PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol));
986 /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
987 1530 sppol = 2;
988 1530 srpol = 2;
989 1530 scpol = 2;
990
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
1530 for (j=0;j<nintv;j++) {
991 1275 ppol[j*ld] = 1.0;
992 1275 rpol[j*ld] = 1.0;
993 1275 cpol[j*ld] = 1.0;
994 }
995 /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
996 rpol is the r-polynomial (corresponding to the residual vector r in CG)
997 cpol is the "corrected" residual polynomial */
998 255 sappol = 3;
999 255 sarpol = 3;
1000
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
255 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
1001
4/4
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 8 times.
4845 for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
1002 255 rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
1003 255 rho = rho00;
1004
1005 /* corrected CR in vector space */
1006 /* we assume x0 is always 0 */
1007
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
255 PetscCall(MatZeroEntries(C));
1008
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
255 PetscCall(MatCopy(B,R,SAME_NONZERO_PATTERN)); /* initial residual r = b-A*x0 */
1009
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
255 PetscCall(MatCopy(R,P,SAME_NONZERO_PATTERN)); /* p = r */
1010
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
255 PetscCall(MatMatMult(A,P,MAT_REUSE_MATRIX,PETSC_DEFAULT,&AP)); /* ap = A*p */
1011
1012
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
51255 for (i=0;i<niter;i++) {
1013 /* iteration in the polynomial space */
1014 51000 den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
1015 51000 alp0 = rho/den;
1016 51000 rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
1017 51000 alp = (rho-rho1)/den;
1018 51000 srpol++;
1019 51000 scpol++;
1020
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
51000 PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
1021
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
51000 PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
1022 51000 sarpol++;
1023
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
51000 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
1024 51000 rho0 = rho;
1025 51000 rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
1026
1027 /* update x in the vector space */
1028 51000 alpha = alp;
1029
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
51000 PetscCall(MatAXPY(C,alpha,P,SAME_NONZERO_PATTERN)); /* x += alp*p */
1030
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
51000 if (rho < tol*rho00) break;
1031
1032 /* finish the iteration in the polynomial space */
1033 51000 bet = rho / rho0;
1034 51000 sppol++;
1035 51000 sappol++;
1036
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
51000 PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
1037
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
51000 PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
1038
1039 /* finish the iteration in the vector space */
1040 51000 alpha = -alp0;
1041
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
51000 PetscCall(MatAXPY(R,alpha,AP,SAME_NONZERO_PATTERN)); /* r -= alp0*ap */
1042 51000 alpha = bet;
1043
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
51000 PetscCall(MatAYPX(P,alpha,R,SAME_NONZERO_PATTERN)); /* p = r + bet*p */
1044
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
51000 PetscCall(MatMatMult(A,R,MAT_REUSE_MATRIX,PETSC_DEFAULT,&W)); /* ap = A*r + bet*ap */
1045
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
51000 PetscCall(MatAYPX(AP,alpha,W,SAME_NONZERO_PATTERN));
1046 }
1047
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
255 PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol));
1048
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
51 PetscFunctionReturn(PETSC_SUCCESS);
1049 }
1050
1051 /*
1052 Gateway to FILTLAN for evaluating C=p(A)*B
1053 */
1054 255 static PetscErrorCode MatMatMult_FILTLAN(Mat A,Mat B,Mat C,void *pctx)
1055 {
1056 255 ST st;
1057 255 ST_FILTER *ctx;
1058 255 FILTLAN_CTX filtlan;
1059 255 PetscInt i,m1,m2,npoints;
1060
1061
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
255 PetscFunctionBegin;
1062
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
255 PetscCall(MatShellGetContext(A,&st));
1063 255 ctx = (ST_FILTER*)st->data;
1064 255 filtlan = (FILTLAN_CTX)ctx->data;
1065
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
255 npoints = (filtlan->filterInfo->filterType == 2)? 6: 4;
1066
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
255 if (ctx->nW) { /* check if work matrices must be resized */
1067
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
245 PetscCall(MatGetSize(B,NULL,&m1));
1068
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
245 PetscCall(MatGetSize(ctx->W[0],NULL,&m2));
1069
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
245 if (m1!=m2) {
1070
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
30 PetscCall(MatDestroyMatrices(ctx->nW,&ctx->W));
1071 30 ctx->nW = 0;
1072 }
1073 }
1074
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
255 if (!ctx->nW) { /* allocate work matrices */
1075 40 ctx->nW = 4;
1076
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
40 PetscCall(PetscMalloc1(ctx->nW,&ctx->W));
1077
7/8
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 8 times.
✓ Branch 3 taken 6 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 2 times.
200 for (i=0;i<ctx->nW;i++) PetscCall(MatDuplicate(B,MAT_DO_NOT_COPY_VALUES,&ctx->W[i]));
1078 }
1079
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
255 PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(ctx->T,B,ctx->W[0],filtlan->baseFilter,2*filtlan->baseDegree+2,filtlan->intervals,npoints-1,filtlan->opts->intervalWeights,ctx->polyDegree,st->work,C,ctx->W[1],ctx->W[2],ctx->W[3]));
1080
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
255 PetscCall(MatMatMult(ctx->T,ctx->W[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&C));
1081
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
51 PetscFunctionReturn(PETSC_SUCCESS);
1082 }
1083
1084 /*
1085 FILTLAN function PolynomialFilterInterface::setFilter
1086
1087 Creates the shifted (and scaled) matrix and the base filter P(z).
1088 M is a shell matrix whose MatMult() applies the filter.
1089 */
1090 68 static PetscErrorCode STComputeOperator_Filter_FILTLAN(ST st,Mat *G)
1091 {
1092 68 ST_FILTER *ctx = (ST_FILTER*)st->data;
1093 68 FILTLAN_CTX filtlan = (FILTLAN_CTX)ctx->data;
1094 68 PetscInt i,npoints,n,m,N,M;
1095 68 PetscReal frame2[4];
1096 68 PetscScalar alpha;
1097 68 const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 };
1098
1099
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
68 PetscFunctionBegin;
1100
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
68 PetscCall(STSetWorkVecs(st,4));
1101 68 filtlan->frame[0] = ctx->left;
1102 68 filtlan->frame[1] = ctx->inta;
1103 68 filtlan->frame[2] = ctx->intb;
1104 68 filtlan->frame[3] = ctx->right;
1105
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
68 if (filtlan->frame[0] == filtlan->frame[1]) { /* low pass filter, convert it to high pass filter */
1106 /* T = frame[3]*eye(n) - A */
1107 PetscCall(MatDestroy(&ctx->T));
1108 PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T));
1109 PetscCall(MatScale(ctx->T,-1.0));
1110 alpha = filtlan->frame[3];
1111 PetscCall(MatShift(ctx->T,alpha));
1112 for (i=0;i<4;i++) frame2[i] = filtlan->frame[3] - filtlan->frame[3-i];
1113 } else { /* it can be a mid-pass filter or a high-pass filter */
1114
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
68 if (filtlan->frame[0] == 0.0) {
1115 PetscCall(PetscObjectReference((PetscObject)st->A[0]));
1116 PetscCall(MatDestroy(&ctx->T));
1117 ctx->T = st->A[0];
1118 for (i=0;i<4;i++) frame2[i] = filtlan->frame[i];
1119 } else {
1120 /* T = A - frame[0]*eye(n) */
1121
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
68 PetscCall(MatDestroy(&ctx->T));
1122
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
68 PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T));
1123 68 alpha = -filtlan->frame[0];
1124
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
68 PetscCall(MatShift(ctx->T,alpha));
1125
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
340 for (i=0;i<4;i++) frame2[i] = filtlan->frame[i] - filtlan->frame[0];
1126 }
1127 }
1128
1129 /* no need to recompute filter if the parameters did not change */
1130
3/4
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 8 times.
68 if (st->state==ST_STATE_INITIAL || ctx->filtch) {
1131
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
60 PetscCall(FILTLAN_GetIntervals(filtlan->intervals,frame2,ctx->polyDegree,filtlan->baseDegree,filtlan->opts,filtlan->filterInfo));
1132 /* translate the intervals back */
1133
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
60 if (filtlan->frame[0] == filtlan->frame[1]) { /* low pass filter, convert it to high pass filter */
1134 for (i=0;i<4;i++) filtlan->intervals2[i] = filtlan->frame[3] - filtlan->intervals[3-i];
1135 } else { /* it can be a mid-pass filter or a high-pass filter */
1136
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
60 if (filtlan->frame[0] == 0.0) {
1137 for (i=0;i<6;i++) filtlan->intervals2[i] = filtlan->intervals[i];
1138 } else {
1139
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
420 for (i=0;i<6;i++) filtlan->intervals2[i] = filtlan->intervals[i] + filtlan->frame[0];
1140 }
1141 }
1142
1143
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
60 npoints = (filtlan->filterInfo->filterType == 2)? 6: 4;
1144
5/8
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 6 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
60 PetscCall(PetscFree(filtlan->baseFilter));
1145
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
60 PetscCall(PetscMalloc1((2*filtlan->baseDegree+2)*(npoints-1),&filtlan->baseFilter));
1146
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
60 PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(filtlan->baseFilter,filtlan->intervals,npoints,HighLowFlags,filtlan->baseDegree));
1147
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
60 PetscCall(PetscInfo(st,"Computed value of yLimit = %g\n",(double)filtlan->filterInfo->yLimit));
1148 }
1149 68 ctx->filtch = PETSC_FALSE;
1150
1151 /* create shell matrix*/
1152
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 8 times.
68 if (!*G) {
1153
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
60 PetscCall(MatGetSize(ctx->T,&N,&M));
1154
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
60 PetscCall(MatGetLocalSize(ctx->T,&n,&m));
1155
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
60 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)st),n,m,N,M,st,G));
1156
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
60 PetscCall(MatShellSetOperation(*G,MATOP_MULT,(PetscErrorCodeFn*)MatMult_FILTLAN));
1157
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
60 PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSE,MATDENSE));
1158
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
60 PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSECUDA,MATDENSECUDA));
1159
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
60 PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSEHIP,MATDENSEHIP));
1160 }
1161
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
16 PetscFunctionReturn(PETSC_SUCCESS);
1162 }
1163
1164 211 static PetscErrorCode STFilterGetThreshold_Filter_FILTLAN(ST st,PetscReal *gamma)
1165 {
1166 211 ST_FILTER *ctx = (ST_FILTER*)st->data;
1167 211 FILTLAN_CTX filtlan = (FILTLAN_CTX)ctx->data;
1168
1169
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
211 PetscFunctionBegin;
1170 211 *gamma = filtlan->filterInfo->yLimit;
1171
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
211 PetscFunctionReturn(PETSC_SUCCESS);
1172 }
1173
1174 52 static PetscErrorCode STDestroy_Filter_FILTLAN(ST st)
1175 {
1176 52 ST_FILTER *ctx = (ST_FILTER*)st->data;
1177 52 FILTLAN_CTX filtlan = (FILTLAN_CTX)ctx->data;
1178
1179
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
52 PetscFunctionBegin;
1180
5/8
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 6 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
52 PetscCall(PetscFree(filtlan->opts));
1181
5/8
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 6 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
52 PetscCall(PetscFree(filtlan->filterInfo));
1182
5/8
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 6 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
52 PetscCall(PetscFree(filtlan->baseFilter));
1183
6/8
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 6 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
52 PetscCall(PetscFree(filtlan));
1184
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
12 PetscFunctionReturn(PETSC_SUCCESS);
1185 }
1186
1187 52 PetscErrorCode STCreate_Filter_FILTLAN(ST st)
1188 {
1189 52 ST_FILTER *ctx = (ST_FILTER*)st->data;
1190 52 FILTLAN_CTX filtlan;
1191 52 FILTLAN_IOP iop;
1192 52 FILTLAN_PFI pfi;
1193
1194
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
52 PetscFunctionBegin;
1195
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
52 PetscCall(PetscNew(&filtlan));
1196 52 ctx->data = (void*)filtlan;
1197 52 filtlan->baseDegree = 10;
1198
1199
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
52 PetscCall(PetscNew(&iop));
1200 52 filtlan->opts = iop;
1201 52 iop->intervalWeights[0] = 100.0;
1202 52 iop->intervalWeights[1] = 1.0;
1203 52 iop->intervalWeights[2] = 1.0;
1204 52 iop->intervalWeights[3] = 1.0;
1205 52 iop->intervalWeights[4] = 100.0;
1206 52 iop->transIntervalRatio = 0.6;
1207 52 iop->reverseInterval = PETSC_FALSE;
1208 52 iop->initialPlateau = 0.1;
1209 52 iop->plateauShrinkRate = 1.5;
1210 52 iop->initialShiftStep = 0.01;
1211 52 iop->shiftStepExpanRate = 1.5;
1212 52 iop->maxInnerIter = 30;
1213 52 iop->yLimitTol = 0.0001;
1214 52 iop->maxOuterIter = 50;
1215 52 iop->numGridPoints = 200;
1216 52 iop->yBottomLine = 0.001;
1217 52 iop->yRippleLimit = 0.8;
1218
1219
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 6 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
52 PetscCall(PetscNew(&pfi));
1220 52 filtlan->filterInfo = pfi;
1221
1222 52 ctx->computeoperator = STComputeOperator_Filter_FILTLAN;
1223 52 ctx->getthreshold = STFilterGetThreshold_Filter_FILTLAN;
1224 52 ctx->destroy = STDestroy_Filter_FILTLAN;
1225
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
52 PetscFunctionReturn(PETSC_SUCCESS);
1226 }
1227