GCC Code Coverage Report


Directory: ./
File: src/sys/classes/st/impls/filter/filtlan.c
Date: 2026-04-06 03:57:41
Exec Total Coverage
Lines: 585 609 96.1%
Functions: 18 18 100.0%
Branches: 716 1158 61.8%

Line Branch Exec Source
1 /*
2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 SLEPc - Scalable Library for Eigenvalue Problem Computations
4 Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5
6 This file is part of SLEPc.
7 SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 */
10 /*
11 This file is an adaptation of several subroutines from FILTLAN, the
12 Filtered Lanczos Package, authored by Haw-ren Fang and Yousef Saad.
13
14 More information at:
15 https://www-users.cs.umn.edu/~saad/software/filtlan
16
17 References:
18
19 [1] H. Fang and Y. Saad, "A filtered Lanczos procedure for extreme and interior
20 eigenvalue problems", SIAM J. Sci. Comput. 34(4):A2220-A2246, 2012.
21 */
22
23 #include <slepc/private/stimpl.h>
24 #include "filter.h"
25
26 static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal*,PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal*,PetscInt);
27 static PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal);
28 static PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt,PetscReal,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*);
29
30 /* ////////////////////////////////////////////////////////////////////////////
31 // Newton - Hermite Polynomial Interpolation
32 //////////////////////////////////////////////////////////////////////////// */
33
34 /*
35 FILTLAN function NewtonPolynomial
36
37 build P(z) by Newton's divided differences in the form
38 P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1)),
39 such that P(x(i)) = y(i) for i=1,...,n, where
40 x,y are input vectors of length n, and a is the output vector of length n
41 if x(i)==x(j) for some i!=j, then it is assumed that the derivative of P(z) is to be zero at x(i),
42 and the Hermite polynomial interpolation is applied
43 in general, if there are k x(i)'s with the same value x0, then
44 the j-th order derivative of P(z) is zero at z=x0 for j=1,...,k-1
45 */
46 19696 static inline PetscErrorCode FILTLAN_NewtonPolynomial(PetscInt n,PetscReal *x,PetscReal *y,PetscReal *sa,PetscReal *sf)
47 {
48 19696 PetscReal d,*sx=x,*sy=y;
49 19696 PetscInt j,k;
50
51
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
19696 PetscFunctionBegin;
52
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
19696 PetscCall(PetscArraycpy(sf,sy,n));
53
54 /* apply Newton's finite difference method */
55 19696 sa[0] = sf[0];
56
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
433312 for (j=1;j<n;j++) {
57
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
4963392 for (k=n-1;k>=j;k--) {
58 4549776 d = sx[k]-sx[k-j];
59
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
4549776 if (PetscUnlikely(d == 0.0)) sf[k] = 0.0; /* assume that the derivative is 0.0 and apply the Hermite interpolation */
60 2192036 else sf[k] = (sf[k]-sf[k-1]) / d;
61 }
62 413616 sa[j] = sf[j];
63 }
64
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
3584 PetscFunctionReturn(PETSC_SUCCESS);
65 }
66
67 /*
68 FILTLAN function HermiteBaseFilterInChebyshevBasis
69
70 compute a base filter P(z) which is a continuous, piecewise polynomial P(z) expanded
71 in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials in each interval
72
73 The base filter P(z) equals P_j(z) for z in the j-th interval [intv(j), intv(j+1)), where
74 P_j(z) a Hermite interpolating polynomial
75
76 input:
77 intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
78 HiLowFlags determines the shape of the base filter P(z)
79 Consider the j-th interval [intv(j), intv(j+1)]
80 HighLowFlag[j-1]==1, P(z)==1 for z in [intv(j), intv(j+1)]
81 ==0, P(z)==0 for z in [intv(j), intv(j+1)]
82 ==-1, [intv(j), intv(j+1)] is a transition interval;
83 P(intv(j)) and P(intv(j+1)) are defined such that P(z) is continuous
84 baseDeg is the degree of smoothness of the Hermite (piecewise polynomial) interpolation
85 to be precise, the i-th derivative of P(z) is zero, i.e. d^{i}P(z)/dz^i==0, at all interval
86 end points z=intv(j) for i=1,...,baseDeg
87
88 output:
89 P(z) expanded in a basis of `translated' (scale-and-shift) Chebyshev polynomials
90 to be precise, for z in the j-th interval [intv(j),intv(j+1)), P(z) equals
91 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
92 where S_i(z) is the `translated' Chebyshev polynomial in that interval,
93 S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
94 with T_i(z) the Chebyshev polynomial of the first kind,
95 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
96 the return matrix is the matrix of Chebyshev coefficients pp just described
97
98 note that the degree of P(z) in each interval is (at most) 2*baseDeg+1, with 2*baseDeg+2 coefficients
99 let n be the length of intv; then there are n-1 intervals
100 therefore the return matrix pp is of size (2*baseDeg+2)-by-(n-1)
101 */
102 9848 static PetscErrorCode FILTLAN_HermiteBaseFilterInChebyshevBasis(PetscReal *baseFilter,PetscReal *intv,PetscInt npoints,const PetscInt *HighLowFlags,PetscInt baseDeg)
103 {
104 9848 PetscInt m,ii,jj;
105 9848 PetscReal flag,flag0,flag2,aa,bb,*px,*py,*sx,*sy,*pp,*qq,*sq,*sbf,*work,*currentPoint = intv;
106 9848 const PetscInt *hilo = HighLowFlags;
107
108
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
9848 PetscFunctionBegin;
109 9848 m = 2*baseDeg+2;
110 9848 jj = npoints-1; /* jj is initialized as the number of intervals */
111
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
9848 PetscCall(PetscMalloc5(m,&px,m,&py,m,&pp,m,&qq,m,&work));
112 sbf = baseFilter;
113
114
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
59088 while (jj--) { /* the main loop to compute the Chebyshev coefficients */
115
116 49240 flag = (PetscReal)(*hilo++); /* get the flag of the current interval */
117
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
49240 if (flag == -1.0) { /* flag == -1 means that the current interval is a transition polynomial */
118
119 19696 flag2 = (PetscReal)(*hilo); /* get flag2, the flag of the next interval */
120 19696 flag0 = 1.0-flag2; /* the flag of the previous interval is 1-flag2 */
121
122 /* two pointers for traversing x[] and y[] */
123 19696 sx = px;
124 19696 sy = py;
125
126 /* find the current interval [aa,bb] */
127 19696 aa = *currentPoint++;
128 19696 bb = *currentPoint;
129
130 /* now left-hand side */
131 19696 ii = baseDeg+1;
132
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
236352 while (ii--) {
133 216656 *sy++ = flag0;
134 216656 *sx++ = aa;
135 }
136
137 /* now right-hand side */
138 ii = baseDeg+1;
139
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
236352 while (ii--) {
140 216656 *sy++ = flag2;
141 216656 *sx++ = bb;
142 }
143
144 /* build a Newton polynomial (indeed, the generalized Hermite interpolating polynomial) with x[] and y[] */
145
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
19696 PetscCall(FILTLAN_NewtonPolynomial(m,px,py,pp,work));
146
147 /* pp contains coefficients of the Newton polynomial P(z) in the current interval [aa,bb], where
148 P(z) = pp(1) + pp(2)*(z-px(1)) + pp(3)*(z-px(1))*(z-px(2)) + ... + pp(n)*(z-px(1))*...*(z-px(n-1)) */
149
150 /* translate the Newton coefficients to the Chebyshev coefficients */
151
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
19696 PetscCall(FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(m,aa,bb,pp,px,qq,work));
152 /* qq contains coefficients of the polynomial in [aa,bb] in the `translated' Chebyshev basis */
153
154 /* copy the Chebyshev coefficients to baseFilter
155 OCTAVE/MATLAB: B(:,j) = qq, where j = (npoints-1)-jj and B is the return matrix */
156 19696 sq = qq;
157 19696 ii = 2*baseDeg+2;
158
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
453008 while (ii--) *sbf++ = *sq++;
159
160 } else {
161
162 /* a constant polynomial P(z)=flag, where either flag==0 or flag==1
163 OCTAVE/MATLAB: B(1,j) = flag, where j = (npoints-1)-jj and B is the return matrix */
164 29544 *sbf++ = flag;
165
166 /* the other coefficients are all zero, since P(z) is a constant
167 OCTAVE/MATLAB: B(1,j) = 0, where j = (npoints-1)-jj and B is the return matrix */
168 29544 ii = 2*baseDeg+1;
169
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
649968 while (ii--) *sbf++ = 0.0;
170
171 /* for the next point */
172 29544 currentPoint++;
173 }
174 }
175
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
9848 PetscCall(PetscFree5(px,py,pp,qq,work));
176
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
1792 PetscFunctionReturn(PETSC_SUCCESS);
177 }
178
179 /* ////////////////////////////////////////////////////////////////////////////
180 // Base Filter
181 //////////////////////////////////////////////////////////////////////////// */
182
183 /*
184 FILTLAN function GetIntervals
185
186 this routine determines the intervals (including the transition one(s)) by an iterative process
187
188 frame is a vector consisting of 4 ordered elements:
189 [frame(1),frame(4)] is the interval which (tightly) contains all eigenvalues, and
190 [frame(2),frame(3)] is the interval in which the eigenvalues are sought
191 baseDeg is the left-and-right degree of the base filter for each interval
192 polyDeg is the (maximum possible) degree of s(z), with z*s(z) the polynomial filter
193 intv is the output; the j-th interval is [intv(j),intv(j+1))
194 opts is a collection of interval options
195
196 the base filter P(z) is a piecewise polynomial from Hermite interpolation with degree baseDeg
197 at each end point of intervals
198
199 the polynomial filter Q(z) is in the form z*s(z), i.e. Q(0)==0, such that ||(1-P(z))-z*s(z)||_w is
200 minimized with s(z) a polynomial of degree up to polyDeg
201
202 the resulting polynomial filter Q(z) satisfies Q(x)>=Q(y) for x in [frame[1],frame[2]], and
203 y in [frame[0],frame[3]] but not in [frame[1],frame[2]]
204
205 the routine fills a PolynomialFilterInfo struct which gives some information of the polynomial filter
206 */
207 78 static PetscErrorCode FILTLAN_GetIntervals(PetscReal *intervals,PetscReal *frame,PetscInt polyDeg,PetscInt baseDeg,FILTLAN_IOP opts,FILTLAN_PFI filterInfo)
208 {
209 78 PetscReal intv[6],x,y,z1,z2,c,c1,c2,fc,fc2,halfPlateau,leftDelta,rightDelta,gridSize;
210 78 PetscReal yLimit,ySummit,yLeftLimit,yRightLimit,bottom,qIndex,*baseFilter,*polyFilter;
211 78 PetscReal yLimitGap=0.0,yLeftSummit=0.0,yLeftBottom=0.0,yRightSummit=0.0,yRightBottom=0.0;
212 78 PetscInt i,ii,npoints,numIter,numLeftSteps=1,numRightSteps=1,numMoreLooked=0;
213 78 PetscBool leftBoundaryMet=PETSC_FALSE,rightBoundaryMet=PETSC_FALSE,stepLeft,stepRight;
214 78 const PetscReal a=frame[0],a1=frame[1],b1=frame[2],b=frame[3];
215 78 const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 }; /* if filterType is 1, only first 3 elements will be used */
216 78 const PetscInt numLookMore = 2*(PetscInt)(0.5+(PetscLogReal(2.0)/PetscLogReal(opts->shiftStepExpanRate)));
217
218
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
78 PetscFunctionBegin;
219
3/8
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 10 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
78 PetscCheck(a<=a1 && a1<=b1 && b1<=b,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Values in the frame vector should be non-decreasing");
220
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
78 PetscCheck(a1!=b1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The range of wanted eigenvalues cannot be of size zero");
221 78 filterInfo->filterType = 2; /* mid-pass filter, for interior eigenvalues */
222
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
78 if (b == b1) {
223 PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"A polynomial filter should not cover all eigenvalues");
224 filterInfo->filterType = 1; /* high-pass filter, for largest eigenvalues */
225
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
78 } else PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"filterType==3 for smallest eigenvalues should be pre-converted to filterType==1 for largest eigenvalues");
226
227 /* the following recipe follows Yousef Saad (2005, 2006) with a few minor adaptations / enhancements */
228 78 halfPlateau = 0.5*(b1-a1)*opts->initialPlateau; /* half length of the "plateau" of the (dual) base filter */
229 78 leftDelta = (b1-a1)*opts->initialShiftStep; /* initial left shift */
230 78 rightDelta = leftDelta; /* initial right shift */
231 78 opts->numGridPoints = PetscMax(opts->numGridPoints,(PetscInt)(2.0*(b-a)/halfPlateau));
232 78 gridSize = (b-a) / (PetscReal)opts->numGridPoints;
233
234
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
546 for (i=0;i<6;i++) intv[i] = 0.0;
235
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
78 if (filterInfo->filterType == 2) { /* for interior eigenvalues */
236 78 npoints = 6;
237 78 intv[0] = a;
238 78 intv[5] = b;
239 /* intv[1], intv[2], intv[3], and intv[4] to be determined */
240 } else { /* filterType == 1 (or 3 with conversion), for extreme eigenvalues */
241 npoints = 4;
242 intv[0] = a;
243 intv[3] = b;
244 /* intv[1], and intv[2] to be determined */
245 }
246 78 z1 = a1 - leftDelta;
247 78 z2 = b1 + rightDelta;
248 78 filterInfo->filterOK = 0; /* not yet found any OK filter */
249
250 /* allocate matrices */
251
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(PetscMalloc2((polyDeg+2)*(npoints-1),&polyFilter,(2*baseDeg+2)*(npoints-1),&baseFilter));
252
253 /* initialize the intervals, mainly for the case opts->maxOuterIter == 0 */
254 78 intervals[0] = intv[0];
255 78 intervals[3] = intv[3];
256 78 intervals[5] = intv[5];
257 78 intervals[1] = z1;
258
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
78 if (filterInfo->filterType == 2) { /* a mid-pass filter for interior eigenvalues */
259 78 intervals[4] = z2;
260 78 c = (a1+b1) / 2.0;
261 78 intervals[2] = c - halfPlateau;
262 78 intervals[3] = c + halfPlateau;
263 } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
264 intervals[2] = z1 + (b1-z1)*opts->transIntervalRatio;
265 }
266
267 /* the main loop */
268
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
790 for (numIter=1;numIter<=opts->maxOuterIter;numIter++) {
269
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
790 if (z1 <= a) { /* outer loop updates z1 and z2 */
270 190 z1 = a;
271 190 leftBoundaryMet = PETSC_TRUE;
272 }
273
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
790 if (filterInfo->filterType == 2) { /* a <= z1 < (a1) */
274
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
790 if (z2 >= b) { /* a mid-pass filter for interior eigenvalues */
275 z2 = b;
276 rightBoundaryMet = PETSC_TRUE;
277 }
278 /* a <= z1 < c-h < c+h < z2 <= b, where h is halfPlateau */
279 /* [z1, c-h] and [c+h, z2] are transition interval */
280 790 intv[1] = z1;
281 790 intv[4] = z2;
282 790 c1 = z1 + halfPlateau;
283 790 intv[2] = z1; /* i.e. c1 - halfPlateau */
284 790 intv[3] = c1 + halfPlateau;
285
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
790 PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
286
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
790 PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
287 /* fc1 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); */
288 790 c2 = z2 - halfPlateau;
289 790 intv[2] = c2 - halfPlateau;
290 790 intv[3] = z2; /* i.e. c2 + halfPlateau */
291
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
790 PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
292
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
790 PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
293 790 fc2 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
294 790 yLimitGap = PETSC_MAX_REAL;
295 790 ii = opts->maxInnerIter;
296
3/4
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
8980 while (ii-- && !(yLimitGap <= opts->yLimitTol)) {
297 /* recursive bisection to get c such that p(a1) are p(b1) approximately the same */
298 8190 c = (c1+c2) / 2.0;
299 8190 intv[2] = c - halfPlateau;
300 8190 intv[3] = c + halfPlateau;
301
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
8190 PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg));
302
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
8190 PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg));
303 8190 fc = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
304
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
8190 if (fc*fc2 < 0.0) {
305 c1 = c;
306 /* fc1 = fc; */
307 } else {
308 4547 c2 = c;
309 4547 fc2 = fc;
310 }
311 8190 yLimitGap = PetscAbsReal(fc);
312 }
313 } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
314 intv[1] = z1;
315 intv[2] = z1 + (b1-z1)*opts->transIntervalRatio;
316 PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,4,HighLowFlags,baseDeg));
317 PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,4,opts->intervalWeights,polyDeg));
318 }
319 /* polyFilter contains the coefficients of the polynomial filter which approximates phi(x)
320 expanded in the `translated' Chebyshev basis */
321 /* psi(x) = 1.0 - phi(x) is the dual base filter approximated by a polynomial in the form x*p(x) */
322 790 yLeftLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
323 790 yRightLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1);
324
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
790 yLimit = (yLeftLimit < yRightLimit) ? yLeftLimit : yRightLimit;
325
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
790 ySummit = (yLeftLimit > yRightLimit) ? yLeftLimit : yRightLimit;
326 x = a1;
327
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
31600 while ((x+=gridSize) < b1) {
328 30810 y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
329
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
30810 if (y < yLimit) yLimit = y;
330
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
30810 if (y > ySummit) ySummit = y;
331 }
332 /* now yLimit is the minimum of x*p(x) for x in [a1, b1] */
333 790 stepLeft = PETSC_FALSE;
334 790 stepRight = PETSC_FALSE;
335
6/6
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
✓ Branch 4 taken 10 times.
✓ Branch 5 taken 10 times.
790 if ((yLimit < yLeftLimit && yLimit < yRightLimit) || yLimit < opts->yBottomLine) {
336 /* very bad, step to see what will happen */
337 240 stepLeft = PETSC_TRUE;
338
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
240 if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
339
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
550 } else if (filterInfo->filterType == 2) {
340
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
550 if (yLeftLimit < yRightLimit) {
341
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
302 if (yRightLimit-yLeftLimit > opts->yLimitTol) stepLeft = PETSC_TRUE;
342
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
248 } else if (yLeftLimit-yRightLimit > opts->yLimitTol) stepRight = PETSC_TRUE;
343 }
344
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
790 if (!stepLeft) {
345 yLeftBottom = yLeftLimit;
346 x = a1;
347
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
28144 while ((x-=gridSize) >= a) {
348 28094 y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
349
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
28094 if (y < yLeftBottom) yLeftBottom = y;
350
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 1 times.
500 else if (y > yLeftBottom) break;
351 }
352 yLeftSummit = yLeftBottom;
353
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1670216 while ((x-=gridSize) >= a) {
354 1669666 y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
355
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1669666 if (y > yLeftSummit) {
356 23956 yLeftSummit = y;
357
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
23956 if (yLeftSummit > yLimit*opts->yRippleLimit) {
358 stepLeft = PETSC_TRUE;
359 break;
360 }
361 }
362
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1669666 if (y < yLeftBottom) yLeftBottom = y;
363 }
364 }
365
3/4
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
790 if (filterInfo->filterType == 2 && !stepRight) {
366 yRightBottom = yRightLimit;
367 x = b1;
368
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
29778 while ((x+=gridSize) <= b) {
369 29778 y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
370
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
29778 if (y < yRightBottom) yRightBottom = y;
371
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 1 times.
550 else if (y > yRightBottom) break;
372 }
373 yRightSummit = yRightBottom;
374
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1331692 while ((x+=gridSize) <= b) {
375 1331142 y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
376
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1331142 if (y > yRightSummit) {
377 23980 yRightSummit = y;
378
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
23980 if (yRightSummit > yLimit*opts->yRippleLimit) {
379 stepRight = PETSC_TRUE;
380 break;
381 }
382 }
383
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1331142 if (y < yRightBottom) yRightBottom = y;
384 }
385 }
386
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
790 if (!stepLeft && !stepRight) {
387
3/4
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
550 if (filterInfo->filterType == 2) bottom = PetscMin(yLeftBottom,yRightBottom);
388 else bottom = yLeftBottom;
389 550 qIndex = 1.0 - (yLimit-bottom) / (ySummit-bottom);
390
4/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
550 if (filterInfo->filterOK == 0 || filterInfo->filterQualityIndex < qIndex) {
391 /* found the first OK filter or a better filter */
392
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1666 for (i=0;i<6;i++) intervals[i] = intv[i];
393 238 filterInfo->filterOK = 1;
394 238 filterInfo->filterQualityIndex = qIndex;
395 238 filterInfo->numIter = numIter;
396 238 filterInfo->yLimit = yLimit;
397 238 filterInfo->ySummit = ySummit;
398 238 filterInfo->numLeftSteps = numLeftSteps;
399 238 filterInfo->yLeftSummit = yLeftSummit;
400 238 filterInfo->yLeftBottom = yLeftBottom;
401
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
238 if (filterInfo->filterType == 2) {
402 238 filterInfo->yLimitGap = yLimitGap;
403 238 filterInfo->numRightSteps = numRightSteps;
404 238 filterInfo->yRightSummit = yRightSummit;
405 238 filterInfo->yRightBottom = yRightBottom;
406 }
407 numMoreLooked = 0;
408
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
312 } else if (++numMoreLooked == numLookMore) {
409 /* filter has been optimal */
410 78 filterInfo->filterOK = 2;
411 78 break;
412 }
413 /* try stepping further to see whether it can improve */
414 472 stepLeft = PETSC_TRUE;
415
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
472 if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
416 }
417 /* check whether we can really "step" */
418
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
712 if (leftBoundaryMet) {
419
2/4
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
140 if (filterInfo->filterType == 1 || rightBoundaryMet) break; /* cannot step further, so break the loop */
420
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
140 if (stepLeft) {
421 /* cannot step left, so try stepping right */
422 140 stepLeft = PETSC_FALSE;
423 140 stepRight = PETSC_TRUE;
424 }
425 }
426
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
712 if (rightBoundaryMet && stepRight) {
427 /* cannot step right, so try stepping left */
428 stepRight = PETSC_FALSE;
429 stepLeft = PETSC_TRUE;
430 }
431 /* now "step" */
432
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
712 if (stepLeft) {
433 572 numLeftSteps++;
434
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
572 if (filterInfo->filterType == 2) leftDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
435 572 z1 -= leftDelta;
436 }
437
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
712 if (stepRight) {
438 712 numRightSteps++;
439 712 rightDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
440 712 z2 += rightDelta;
441 }
442
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
712 if (filterInfo->filterType == 2) {
443 /* shrink the "plateau" of the (dual) base filter */
444
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
712 if (stepLeft && stepRight) halfPlateau /= opts->plateauShrinkRate;
445 140 else halfPlateau /= PetscSqrtReal(opts->plateauShrinkRate);
446 }
447 }
448
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
78 PetscCheck(filterInfo->filterOK,PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"STFILTER cannot get the filter specified; please adjust your filter parameters (e.g. increasing the polynomial degree)");
449
450 78 filterInfo->totalNumIter = numIter;
451
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(PetscFree2(polyFilter,baseFilter));
452
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
14 PetscFunctionReturn(PETSC_SUCCESS);
453 }
454
455 /* ////////////////////////////////////////////////////////////////////////////
456 // Chebyshev Polynomials
457 //////////////////////////////////////////////////////////////////////////// */
458
459 /*
460 FILTLAN function ExpandNewtonPolynomialInChebyshevBasis
461
462 translate the coefficients of a Newton polynomial to the coefficients
463 in a basis of the `translated' (scale-and-shift) Chebyshev polynomials
464
465 input:
466 a Newton polynomial defined by vectors a and x:
467 P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1))
468 the interval [aa,bb] defines the `translated' Chebyshev polynomials S_i(z) = T_i((z-c)/h),
469 where c=(aa+bb)/2 and h=(bb-aa)/2, and T_i is the Chebyshev polynomial of the first kind
470 note that T_i is defined by T_0(z)=1, T_1(z)=z, and T_i(z)=2*z*T_{i-1}(z)+T_{i-2}(z) for i>=2
471
472 output:
473 a vector q containing the Chebyshev coefficients:
474 P(z) = q(1)*S_0(z) + q(2)*S_1(z) + ... + q(n)*S_{n-1}(z)
475 */
476 19696 static inline PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt n,PetscReal aa,PetscReal bb,PetscReal *a,PetscReal *x,PetscReal *q,PetscReal *q2)
477 {
478 19696 PetscInt m,mm;
479 19696 PetscReal *sa,*sx,*sq,*sq2,c,c2,h,h2;
480
481
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
19696 PetscFunctionBegin;
482 19696 sa = a+n; /* pointers for traversing a and x */
483 19696 sx = x+n-1;
484 19696 *q = *--sa; /* set q[0] = a(n) */
485
486 19696 c = (aa+bb)/2.0;
487 19696 h = (bb-aa)/2.0;
488 19696 h2 = h/2.0;
489
490
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
433312 for (m=1;m<=n-1;m++) { /* the main loop for translation */
491
492 /* compute q2[0:m-1] = (c-x[n-m-1])*q[0:m-1] */
493 413616 mm = m;
494 413616 sq = q;
495 413616 sq2 = q2;
496 413616 c2 = c-(*--sx);
497
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
4963392 while (mm--) *(sq2++) = c2*(*sq++);
498 413616 *sq2 = 0.0; /* q2[m] = 0.0 */
499 413616 *(q2+1) += h*(*q); /* q2[1] = q2[1] + h*q[0] */
500
501 /* compute q2[0:m-2] = q2[0:m-2] + h2*q[1:m-1] */
502 413616 mm = m-1;
503 413616 sq2 = q2;
504 413616 sq = q+1;
505
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
4549776 while (mm--) *(sq2++) += h2*(*sq++);
506
507 /* compute q2[2:m] = q2[2:m] + h2*q[1:m-1] */
508 413616 mm = m-1;
509 413616 sq2 = q2+2;
510 413616 sq = q+1;
511
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
4549776 while (mm--) *(sq2++) += h2*(*sq++);
512
513 /* compute q[0:m] = q2[0:m] */
514 413616 mm = m+1;
515 413616 sq2 = q2;
516 413616 sq = q;
517
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
5377008 while (mm--) *sq++ = *sq2++;
518 413616 *q += (*--sa); /* q[0] = q[0] + p[n-m-1] */
519 }
520
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
19696 PetscFunctionReturn(PETSC_SUCCESS);
521 }
522
523 /*
524 FILTLAN function PolynomialEvaluationInChebyshevBasis
525
526 evaluate P(z) at z=z0, where P(z) is a polynomial expanded in a basis of
527 the `translated' (i.e. scale-and-shift) Chebyshev polynomials
528
529 input:
530 c is a vector of Chebyshev coefficients which defines the polynomial
531 P(z) = c(1)*S_0(z) + c(2)*S_1(z) + ... + c(n)*S_{n-1}(z),
532 where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
533 c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2
534 note that T_i(z) is the Chebyshev polynomial of the first kind,
535 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
536
537 output:
538 the evaluated value of P(z) at z=z0
539 */
540 3109030 static inline PetscReal FILTLAN_PolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscInt idx,PetscReal z0,PetscReal aa,PetscReal bb)
541 {
542 3109030 PetscInt ii,deg1;
543 3109030 PetscReal y,zz,t0,t1,t2,*sc;
544
545
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
3109030 PetscFunctionBegin;
546 3109030 deg1 = m;
547
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
3109030 if (aa==-1.0 && bb==1.0) zz = z0; /* treat it as a special case to reduce rounding errors */
548
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
3109030 else zz = (aa==bb)? 0.0 : -1.0+2.0*(z0-aa)/(bb-aa);
549
550 /* compute y = P(z0), where we utilize the Chebyshev recursion */
551 3109030 sc = pp+(idx-1)*m; /* sc points to column idx of pp */
552 3109030 y = *sc++;
553 3109030 t1 = 1.0; t2 = zz;
554 3109030 ii = deg1-1;
555
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
616613560 while (ii--) {
556 /* Chebyshev recursion: T_0(zz)=1, T_1(zz)=zz, and T_{i+1}(zz) = 2*zz*T_i(zz) + T_{i-1}(zz) for i>=2
557 the values of T_{i+1}(zz), T_i(zz), T_{i-1}(zz) are stored in t0, t1, t2, respectively */
558 613504530 t0 = 2*zz*t1 - t2;
559 /* it also works for the base case / the first iteration, where t0 equals 2*zz*1-zz == zz which is T_1(zz) */
560 613504530 t2 = t1;
561 613504530 t1 = t0;
562 613504530 y += (*sc++)*t0;
563 }
564
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
3109030 PetscFunctionReturn(y);
565 }
566
567 #define basisTranslated PETSC_TRUE
568 /*
569 FILTLAN function PiecewisePolynomialEvaluationInChebyshevBasis
570
571 evaluate P(z) at z=z0, where P(z) is a piecewise polynomial expanded
572 in a basis of the (optionally translated, i.e. scale-and-shift) Chebyshev polynomials for each interval
573
574 input:
575 intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
576 pp is a matrix of Chebyshev coefficients which defines a piecewise polynomial P(z)
577 in a basis of the `translated' Chebyshev polynomials in each interval
578 the polynomial P_j(z) in the j-th interval, i.e. when z is in [intv(j), intv(j+1)), is defined by the j-th column of pp:
579 if basisTranslated == false, then
580 P_j(z) = pp(1,j)*T_0(z) + pp(2,j)*T_1(z) + ... + pp(n,j)*T_{n-1}(z),
581 where T_i(z) is the Chebyshev polynomial of the first kind,
582 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
583 if basisTranslated == true, then
584 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
585 where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
586 c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2
587
588 output:
589 the evaluated value of P(z) at z=z0
590
591 note that if z0 falls below the first interval, then the polynomial in the first interval will be used to evaluate P(z0)
592 if z0 flies over the last interval, then the polynomial in the last interval will be used to evaluate P(z0)
593 */
594 3109030 static inline PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscReal *intv,PetscInt npoints,PetscReal z0)
595 {
596 3109030 PetscReal *sintv,aa,bb,resul;
597 3109030 PetscInt idx;
598
599
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
3109030 PetscFunctionBegin;
600 /* determine the interval which contains z0 */
601 3109030 sintv = &intv[1];
602 3109030 idx = 1;
603
3/4
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
3109030 if (npoints>2 && z0 >= *sintv) {
604 1418286 sintv++;
605
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
5539508 while (++idx < npoints-1) {
606
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
4188354 if (z0 < *sintv) break;
607 4121222 sintv++;
608 }
609 }
610 /* idx==1 if npoints<=2; otherwise idx satisfies:
611 intv(idx) <= z0 < intv(idx+1), if 2 <= idx <= npoints-2
612 z0 < intv(idx+1), if idx == 1
613 intv(idx) <= z0, if idx == npoints-1
614 in addition, sintv points to &intv(idx+1) */
615 3109030 if (basisTranslated) {
616 /* the basis consists of `translated' Chebyshev polynomials */
617 /* find the interval of concern, [aa,bb] */
618 3109030 aa = *(sintv-1);
619 3109030 bb = *sintv;
620 3109030 resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,aa,bb);
621 } else {
622 /* the basis consists of standard Chebyshev polynomials, with interval [-1.0,1.0] for integration */
623 resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,-1.0,1.0);
624 }
625
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
3109030 PetscFunctionReturn(resul);
626 }
627
628 /*
629 FILTLAN function PiecewisePolynomialInnerProductInChebyshevBasis
630
631 compute the weighted inner product of two piecewise polynomials expanded
632 in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
633
634 pp and qq are two matrices of Chebyshev coefficients which define the piecewise polynomials P(z) and Q(z), respectively
635 for z in the j-th interval, P(z) equals
636 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
637 and Q(z) equals
638 Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
639 where S_i(z) is the `translated' Chebyshev polynomial in that interval,
640 S_i((z-c)/h) = T_i(z), c = (aa+bb)) / 2, h = (bb-aa) / 2,
641 with T_i(z) the Chebyshev polynomial of the first kind
642 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
643
644 the (scaled) j-th interval inner product is defined by
645 <P_j,Q_j> = (Pi/2)*(pp(1,j)*qq(1,j) + sum_{k} pp(k,j)*qq(k,j)),
646 which comes from the property
647 <T_0,T_0>=pi, <T_i,T_i>=pi/2 for i>=1, and <T_i,T_j>=0 for i!=j
648
649 the weighted inner product is <P,Q> = sum_{j} intervalWeights(j)*<P_j,Q_j>,
650 which is the return value
651
652 note that for unit weights, pass an empty vector of intervalWeights (i.e. of length 0)
653 */
654 5713519 static inline PetscReal FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(PetscReal *pp,PetscInt prows,PetscInt pcols,PetscInt ldp,PetscReal *qq,PetscInt qrows,PetscInt qcols,PetscInt ldq,const PetscReal *intervalWeights)
655 {
656 5713519 PetscInt nintv,deg1,i,k;
657 5713519 PetscReal *sp,*sq,ans=0.0,ans2;
658
659
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
5713519 PetscFunctionBegin;
660 5713519 deg1 = PetscMin(prows,qrows); /* number of effective coefficients, one more than the effective polynomial degree */
661
2/14
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
5713519 if (PetscUnlikely(!deg1)) PetscFunctionReturn(0.0);
662 5713519 nintv = PetscMin(pcols,qcols); /* number of intervals */
663
664 /* scaled by intervalWeights(i) in the i-th interval (we assume intervalWeights[] are always provided).
665 compute ans = sum_{i=1,...,nintv} intervalWeights(i)*[ pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) ] */
666
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
34281114 for (i=0;i<nintv;i++) { /* compute ans2 = pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) */
667 28567595 sp = pp+i*ldp;
668 28567595 sq = qq+i*ldq;
669 28567595 ans2 = (*sp) * (*sq); /* the first term pp(1,i)*qq(1,i) is being added twice */
670
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1756517235 for (k=0;k<deg1;k++) ans2 += (*sp++)*(*sq++); /* add pp(k,i)*qq(k,i) */
671 28567595 ans += ans2*intervalWeights[i]; /* compute ans += ans2*intervalWeights(i) */
672 }
673
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
5713519 PetscFunctionReturn(ans*PETSC_PI/2.0);
674 }
675
676 /*
677 FILTLAN function PiecewisePolynomialInChebyshevBasisMultiplyX
678
679 compute Q(z) = z*P(z), where P(z) and Q(z) are piecewise polynomials expanded
680 in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
681
682 P(z) and Q(z) are stored as matrices of Chebyshev coefficients pp and qq, respectively
683
684 For z in the j-th interval, P(z) equals
685 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
686 and Q(z) equals
687 Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
688 where S_i(z) is the `translated' Chebyshev polynomial in that interval,
689 S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
690 with T_i(z) the Chebyshev polynomial of the first kind
691 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
692
693 the returned matrix is qq which represents Q(z) = z*P(z)
694 */
695 1906919 static inline PetscErrorCode FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(PetscReal *pp,PetscInt deg1,PetscInt ldp,PetscReal *intv,PetscInt nintv,PetscReal *qq,PetscInt ldq)
696 {
697 1906919 PetscInt i,j;
698 1906919 PetscReal c,h,h2,tmp,*sp,*sq,*sp2,*sq2;
699
700
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
1906919 PetscFunctionBegin;
701
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
11441514 for (j=0;j<nintv;j++) { /* consider interval between intv(j) and intv(j+1) */
702 9534595 sp = pp+j*ldp;
703 9534595 sq = qq+j*ldq;
704 9534595 sp2 = sp;
705 9534595 sq2 = sq+1;
706 9534595 c = 0.5*(intv[j] + intv[j+1]); /* compute c = (intv(j) + intv(j+1))/2 */
707 9534595 h = 0.5*(intv[j+1] - intv[j]); /* compute h = (intv(j+1) - intv(j))/2 and h2 = h/2 */
708 9534595 h2 = 0.5*h;
709 9534595 i = deg1;
710
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
771837035 while (i--) *sq++ = c*(*sp++); /* compute q(1:deg1,j) = c*p(1:deg1,j) */
711 9534595 *sq++ = 0.0; /* set q(deg1+1,j) = 0.0 */
712 9534595 *(sq2++) += h*(*sp2++); /* compute q(2,j) = q(2,j) + h*p(1,j) */
713 9534595 i = deg1-1;
714
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
762302440 while (i--) { /* compute q(3:deg1+1,j) += h2*p(2:deg1,j) and then q(1:deg1-1,j) += h2*p(2:deg1,j) */
715 752767845 tmp = h2*(*sp2++);
716 752767845 *(sq2-2) += tmp;
717 752767845 *(sq2++) += tmp;
718 }
719 }
720
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
1906919 PetscFunctionReturn(PETSC_SUCCESS);
721 }
722
723 /* ////////////////////////////////////////////////////////////////////////////
724 // Conjugate Residual Method in the Polynomial Space
725 //////////////////////////////////////////////////////////////////////////// */
726
727 /*
728 B := alpha*A + beta*B
729
730 A,B are nxk
731 */
732 7593660 static inline PetscErrorCode Mat_AXPY_BLAS(PetscInt n,PetscInt k,PetscReal alpha,const PetscReal *A,PetscInt lda,PetscReal beta,PetscReal *B,PetscInt ldb)
733 {
734 7593660 PetscInt i,j;
735
736
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
7593660 PetscFunctionBegin;
737
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
7593660 if (beta==(PetscReal)1.0) {
738
4/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
1560847100 for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] += alpha*A[i+j*lda];
739
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
3806600 PetscCall(PetscLogFlops(2.0*n*k));
740 } else {
741
4/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
1556527110 for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] = alpha*A[i+j*lda] + beta*B[i+j*ldb];
742
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
3787060 PetscCall(PetscLogFlops(3.0*n*k));
743 }
744
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
7593660 PetscFunctionReturn(PETSC_SUCCESS);
745 }
746
747 /*
748 FILTLAN function FilteredConjugateResidualPolynomial
749
750 ** Conjugate Residual Method in the Polynomial Space
751
752 this routine employs a conjugate-residual-type algorithm in polynomial space to minimize ||P(z)-Q(z)||_w,
753 where P(z), the base filter, is the input piecewise polynomial, and
754 Q(z) is the output polynomial satisfying Q(0)==1, i.e. the constant term of Q(z) is 1
755 niter is the number of conjugate-residual iterations; therefore, the degree of Q(z) is up to niter+1
756 both P(z) and Q(z) are expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
757 and presented as matrices of Chebyshev coefficients, denoted by pp and qq, respectively
758
759 input:
760 intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
761 w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
762 the interval weights define the inner product of two continuous functions and then
763 the derived w-norm ||P(z)-Q(z)||_w
764 pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
765 to be specific, for z in [intv(j), intv(j+1)), P(z) equals
766 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
767 where S_i(z) is the `translated' Chebyshev polynomial in that interval,
768 S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
769 with T_i(z) the Chebyshev polynomial of the first kind,
770 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
771
772 output:
773 the return matrix, denoted by qq, represents a polynomial Q(z) with degree up to 1+niter
774 and satisfying Q(0)==1, such that ||P(z))-Q(z)||_w is minimized
775 this polynomial Q(z) is expanded in the `translated' Chebyshev basis for each interval
776 to be precise, considering z in [intv(j), intv(j+1)), Q(z) equals
777 Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(niter+2,j)*S_{niter+1}(z)
778
779 note:
780 1. since Q(0)==1, P(0)==1 is expected; if P(0)!=1, one can translate P(z)
781 for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
782 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
783 in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
784 */
785 9770 static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal *cpol,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt m,PetscReal *intervalWeights,PetscInt niter)
786 {
787 9770 PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld,nintv;
788 9770 PetscReal rho,rho0,rho1,den,bet,alp,alp0,*ppol,*rpol,*appol,*arpol;
789
790
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
9770 PetscFunctionBegin;
791 9770 nintv = m-1;
792 9770 ld = niter+2; /* leading dimension */
793
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
9770 PetscCall(PetscCalloc4(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&appol,ld*nintv,&arpol));
794
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
9770 PetscCall(PetscArrayzero(cpol,ld*nintv));
795 /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
796 58620 sppol = 2;
797 58620 srpol = 2;
798 58620 scpol = 2;
799
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
58620 for (j=0;j<nintv;j++) {
800 48850 ppol[j*ld] = 1.0;
801 48850 rpol[j*ld] = 1.0;
802 48850 cpol[j*ld] = 1.0;
803 }
804 /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
805 rpol is the r-polynomial (corresponding to the residual vector r in CG)
806 cpol is the "corrected" residual polynomial (result of this function) */
807 9770 sappol = 3;
808 9770 sarpol = 3;
809
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
9770 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
810
4/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
185630 for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
811 9770 rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
812
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
1349278 for (i=0;i<niter;i++) {
813 1347500 den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
814 1347500 alp0 = rho/den;
815 1347500 rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
816 1347500 alp = (rho-rho1)/den;
817 1347500 srpol++;
818 1347500 scpol++;
819
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1347500 PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
820
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1347500 PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
821
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1347500 if (i+1 == niter) break;
822 1337730 sarpol++;
823
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1337730 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
824 1337730 rho0 = rho;
825 1337730 rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
826 1337730 bet = rho / rho0;
827 1337730 sppol++;
828 1337730 sappol++;
829
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1337730 PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
830
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1337730 PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
831 }
832
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
9770 PetscCall(PetscFree4(ppol,rpol,appol,arpol));
833
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
1778 PetscFunctionReturn(PETSC_SUCCESS);
834 }
835
836 /*
837 FILTLAN function FilteredConjugateResidualMatrixPolynomialVectorProduct
838
839 this routine employs a conjugate-residual-type algorithm in polynomial space to compute
840 x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized, where
841 P(z) is a given piecewise polynomial, called the base filter,
842 s(z) is a polynomial of degree up to niter, the number of conjugate-residual iterations,
843 and b and x0 are given vectors
844
845 note that P(z) is expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
846 and presented as a matrix of Chebyshev coefficients pp
847
848 input:
849 A is a sparse matrix
850 x0, b are vectors
851 niter is the number of conjugate-residual iterations
852 intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
853 w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
854 the interval weights define the inner product of two continuous functions and then
855 the derived w-norm ||P(z)-Q(z)||_w
856 pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
857 to be specific, for z in [intv(j), intv(j+1)), P(z) equals
858 P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
859 where S_i(z) is the `translated' Chebyshev polynomial in that interval,
860 S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
861 with T_i(z) the Chebyshev polynomial of the first kind,
862 T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
863 tol is the tolerance; if the residual polynomial in z-norm is dropped by a factor lower
864 than tol, then stop the conjugate-residual iteration
865
866 output:
867 the return vector is x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized,
868 subject to that s(z) is a polynomial of degree up to niter, where P(z) is the base filter
869 in short, z*s(z) approximates 1-P(z)
870
871 note:
872 1. since z*s(z) approximates 1-P(z), P(0)==1 is expected; if P(0)!=1, one can translate P(z)
873 for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
874 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
875 in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
876 3. a common application is to compute R(A)*b, where R(z) approximates 1-P(z)
877 in this case, one can set x0 = 0 and then the return vector is x = s(A)*b, where
878 z*s(z) approximates 1-P(z); therefore, A*x is the wanted R(A)*b
879 */
880 3248 static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(Mat A,Vec b,Vec x,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work)
881 {
882 3248 PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld;
883 3248 PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
884 3248 Vec r=work[0],p=work[1],ap=work[2],w=work[3];
885 3248 PetscScalar alpha;
886
887
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
3248 PetscFunctionBegin;
888 3248 ld = niter+3; /* leading dimension */
889
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
3248 PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol));
890 /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
891 19488 sppol = 2;
892 19488 srpol = 2;
893 19488 scpol = 2;
894
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
19488 for (j=0;j<nintv;j++) {
895 16240 ppol[j*ld] = 1.0;
896 16240 rpol[j*ld] = 1.0;
897 16240 cpol[j*ld] = 1.0;
898 }
899 /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
900 rpol is the r-polynomial (corresponding to the residual vector r in CG)
901 cpol is the "corrected" residual polynomial */
902 3248 sappol = 3;
903 3248 sarpol = 3;
904
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
3248 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
905
4/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
61712 for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
906 3248 rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
907 3248 rho = rho00;
908
909 /* corrected CR in vector space */
910 /* we assume x0 is always 0 */
911
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
3248 PetscCall(VecSet(x,0.0));
912
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
3248 PetscCall(VecCopy(b,r)); /* initial residual r = b-A*x0 */
913
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
3248 PetscCall(VecCopy(r,p)); /* p = r */
914
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
3248 PetscCall(MatMult(A,p,ap)); /* ap = A*p */
915
916
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
484848 for (i=0;i<niter;i++) {
917 /* iteration in the polynomial space */
918 481600 den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
919 481600 alp0 = rho/den;
920 481600 rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
921 481600 alp = (rho-rho1)/den;
922 481600 srpol++;
923 481600 scpol++;
924
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
481600 PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
925
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
481600 PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
926 481600 sarpol++;
927
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
481600 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
928 481600 rho0 = rho;
929 481600 rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
930
931 /* update x in the vector space */
932 481600 alpha = alp;
933
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
481600 PetscCall(VecAXPY(x,alpha,p)); /* x += alp*p */
934
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
481600 if (rho < tol*rho00) break;
935
936 /* finish the iteration in the polynomial space */
937 481600 bet = rho / rho0;
938 481600 sppol++;
939 481600 sappol++;
940
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
481600 PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
941
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
481600 PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
942
943 /* finish the iteration in the vector space */
944 481600 alpha = -alp0;
945
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
481600 PetscCall(VecAXPY(r,alpha,ap)); /* r -= alp0*ap */
946 481600 alpha = bet;
947
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
481600 PetscCall(VecAYPX(p,alpha,r)); /* p = r + bet*p */
948
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
481600 PetscCall(MatMult(A,r,w)); /* ap = A*r + bet*ap */
949
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
481600 PetscCall(VecAYPX(ap,alpha,w));
950 }
951
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
3248 PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol));
952
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
582 PetscFunctionReturn(PETSC_SUCCESS);
953 }
954
955 /*
956 Gateway to FILTLAN for evaluating y=p(A)*x
957 */
958 3248 static PetscErrorCode MatMult_FILTLAN(Mat A,Vec x,Vec y)
959 {
960 3248 ST st;
961 3248 ST_FILTER *ctx;
962 3248 FILTLAN_CTX filtlan;
963 3248 PetscInt npoints;
964
965
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
3248 PetscFunctionBegin;
966
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
3248 PetscCall(MatShellGetContext(A,&st));
967 3248 ctx = (ST_FILTER*)st->data;
968 3248 filtlan = (FILTLAN_CTX)ctx->data;
969
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
3248 npoints = (filtlan->filterInfo->filterType == 2)? 6: 4;
970
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
3248 PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(ctx->T,x,y,filtlan->baseFilter,2*filtlan->baseDegree+2,filtlan->intervals,npoints-1,filtlan->opts->intervalWeights,ctx->polyDegree,st->work));
971
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
3248 PetscCall(VecCopy(y,st->work[0]));
972
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
3248 PetscCall(MatMult(ctx->T,st->work[0],y));
973
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
582 PetscFunctionReturn(PETSC_SUCCESS);
974 }
975
976 /* Block version of FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct */
977 371 static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(Mat A,Mat B,Mat C,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work,Mat R,Mat P,Mat AP,Mat W)
978 {
979 371 PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld;
980 371 PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
981 371 PetscScalar alpha;
982
983
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
371 PetscFunctionBegin;
984 371 ld = niter+3; /* leading dimension */
985
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
371 PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol));
986 /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
987 2226 sppol = 2;
988 2226 srpol = 2;
989 2226 scpol = 2;
990
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
2226 for (j=0;j<nintv;j++) {
991 1855 ppol[j*ld] = 1.0;
992 1855 rpol[j*ld] = 1.0;
993 1855 cpol[j*ld] = 1.0;
994 }
995 /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
996 rpol is the r-polynomial (corresponding to the residual vector r in CG)
997 cpol is the "corrected" residual polynomial */
998 371 sappol = 3;
999 371 sarpol = 3;
1000
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
371 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld));
1001
4/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
7049 for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
1002 371 rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
1003 371 rho = rho00;
1004
1005 /* corrected CR in vector space */
1006 /* we assume x0 is always 0 */
1007
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
371 PetscCall(MatZeroEntries(C));
1008
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
371 PetscCall(MatCopy(B,R,SAME_NONZERO_PATTERN)); /* initial residual r = b-A*x0 */
1009
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
371 PetscCall(MatCopy(R,P,SAME_NONZERO_PATTERN)); /* p = r */
1010
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
371 PetscCall(MatMatMult(A,P,MAT_REUSE_MATRIX,PETSC_DEFAULT,&AP)); /* ap = A*p */
1011
1012
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
74571 for (i=0;i<niter;i++) {
1013 /* iteration in the polynomial space */
1014 74200 den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
1015 74200 alp0 = rho/den;
1016 74200 rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
1017 74200 alp = (rho-rho1)/den;
1018 74200 srpol++;
1019 74200 scpol++;
1020
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
74200 PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld));
1021
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
74200 PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld));
1022 74200 sarpol++;
1023
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
74200 PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld));
1024 74200 rho0 = rho;
1025 74200 rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
1026
1027 /* update x in the vector space */
1028 74200 alpha = alp;
1029
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
74200 PetscCall(MatAXPY(C,alpha,P,SAME_NONZERO_PATTERN)); /* x += alp*p */
1030
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
74200 if (rho < tol*rho00) break;
1031
1032 /* finish the iteration in the polynomial space */
1033 74200 bet = rho / rho0;
1034 74200 sppol++;
1035 74200 sappol++;
1036
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
74200 PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld));
1037
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
74200 PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld));
1038
1039 /* finish the iteration in the vector space */
1040 74200 alpha = -alp0;
1041
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
74200 PetscCall(MatAXPY(R,alpha,AP,SAME_NONZERO_PATTERN)); /* r -= alp0*ap */
1042 74200 alpha = bet;
1043
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
74200 PetscCall(MatAYPX(P,alpha,R,SAME_NONZERO_PATTERN)); /* p = r + bet*p */
1044
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
74200 PetscCall(MatMatMult(A,R,MAT_REUSE_MATRIX,PETSC_DEFAULT,&W)); /* ap = A*r + bet*ap */
1045
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
74200 PetscCall(MatAYPX(AP,alpha,W,SAME_NONZERO_PATTERN));
1046 }
1047
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
371 PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol));
1048
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
53 PetscFunctionReturn(PETSC_SUCCESS);
1049 }
1050
1051 /*
1052 Gateway to FILTLAN for evaluating C=p(A)*B
1053 */
1054 371 static PetscErrorCode MatMatMult_FILTLAN(Mat A,Mat B,Mat C,void *pctx)
1055 {
1056 371 ST st;
1057 371 ST_FILTER *ctx;
1058 371 FILTLAN_CTX filtlan;
1059 371 PetscInt i,m1,m2,npoints;
1060
1061
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
371 PetscFunctionBegin;
1062
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
371 PetscCall(MatShellGetContext(A,&st));
1063 371 ctx = (ST_FILTER*)st->data;
1064 371 filtlan = (FILTLAN_CTX)ctx->data;
1065
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
371 npoints = (filtlan->filterInfo->filterType == 2)? 6: 4;
1066
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
371 if (ctx->nW) { /* check if work matrices must be resized */
1067
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
357 PetscCall(MatGetSize(B,NULL,&m1));
1068
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
357 PetscCall(MatGetSize(ctx->W[0],NULL,&m2));
1069
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
357 if (m1!=m2) {
1070
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
56 PetscCall(MatDestroyMatrices(ctx->nW,&ctx->W));
1071 56 ctx->nW = 0;
1072 }
1073 }
1074
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
371 if (!ctx->nW) { /* allocate work matrices */
1075 70 ctx->nW = 4;
1076
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
70 PetscCall(PetscMalloc1(ctx->nW,&ctx->W));
1077
7/8
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 8 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 2 times.
350 for (i=0;i<ctx->nW;i++) PetscCall(MatDuplicate(B,MAT_DO_NOT_COPY_VALUES,&ctx->W[i]));
1078 }
1079
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
371 PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(ctx->T,B,ctx->W[0],filtlan->baseFilter,2*filtlan->baseDegree+2,filtlan->intervals,npoints-1,filtlan->opts->intervalWeights,ctx->polyDegree,st->work,C,ctx->W[1],ctx->W[2],ctx->W[3]));
1080
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
371 PetscCall(MatMatMult(ctx->T,ctx->W[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&C));
1081
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
53 PetscFunctionReturn(PETSC_SUCCESS);
1082 }
1083
1084 /*
1085 FILTLAN function PolynomialFilterInterface::setFilter
1086
1087 Creates the shifted (and scaled) matrix and the base filter P(z).
1088 M is a shell matrix whose MatMult() applies the filter.
1089 */
1090 88 static PetscErrorCode STComputeOperator_Filter_FILTLAN(ST st,Mat *G)
1091 {
1092 88 ST_FILTER *ctx = (ST_FILTER*)st->data;
1093 88 FILTLAN_CTX filtlan = (FILTLAN_CTX)ctx->data;
1094 88 PetscInt i,npoints,n,m,N,M;
1095 88 PetscReal frame2[4];
1096 88 PetscScalar alpha;
1097 88 const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 };
1098
1099
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
88 PetscFunctionBegin;
1100
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
88 PetscCall(STSetWorkVecs(st,4));
1101 88 filtlan->frame[0] = ctx->left;
1102 88 filtlan->frame[1] = ctx->inta;
1103 88 filtlan->frame[2] = ctx->intb;
1104 88 filtlan->frame[3] = ctx->right;
1105
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
88 if (filtlan->frame[0] == filtlan->frame[1]) { /* low pass filter, convert it to high pass filter */
1106 /* T = frame[3]*eye(n) - A */
1107 PetscCall(MatDestroy(&ctx->T));
1108 PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T));
1109 PetscCall(MatScale(ctx->T,-1.0));
1110 alpha = filtlan->frame[3];
1111 PetscCall(MatShift(ctx->T,alpha));
1112 for (i=0;i<4;i++) frame2[i] = filtlan->frame[3] - filtlan->frame[3-i];
1113 } else { /* it can be a mid-pass filter or a high-pass filter */
1114
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
88 if (filtlan->frame[0] == 0.0) {
1115 PetscCall(PetscObjectReference((PetscObject)st->A[0]));
1116 PetscCall(MatDestroy(&ctx->T));
1117 ctx->T = st->A[0];
1118 for (i=0;i<4;i++) frame2[i] = filtlan->frame[i];
1119 } else {
1120 /* T = A - frame[0]*eye(n) */
1121
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
88 PetscCall(MatDestroy(&ctx->T));
1122
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
88 PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T));
1123 88 alpha = -filtlan->frame[0];
1124
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
88 PetscCall(MatShift(ctx->T,alpha));
1125
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
440 for (i=0;i<4;i++) frame2[i] = filtlan->frame[i] - filtlan->frame[0];
1126 }
1127 }
1128
1129 /* no need to recompute filter if the parameters did not change */
1130
3/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 10 times.
88 if (st->state==ST_STATE_INITIAL || ctx->filtch) {
1131
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(FILTLAN_GetIntervals(filtlan->intervals,frame2,ctx->polyDegree,filtlan->baseDegree,filtlan->opts,filtlan->filterInfo));
1132 /* translate the intervals back */
1133
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
78 if (filtlan->frame[0] == filtlan->frame[1]) { /* low pass filter, convert it to high pass filter */
1134 for (i=0;i<4;i++) filtlan->intervals2[i] = filtlan->frame[3] - filtlan->intervals[3-i];
1135 } else { /* it can be a mid-pass filter or a high-pass filter */
1136
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
78 if (filtlan->frame[0] == 0.0) {
1137 for (i=0;i<6;i++) filtlan->intervals2[i] = filtlan->intervals[i];
1138 } else {
1139
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
546 for (i=0;i<6;i++) filtlan->intervals2[i] = filtlan->intervals[i] + filtlan->frame[0];
1140 }
1141 }
1142
1143
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
78 npoints = (filtlan->filterInfo->filterType == 2)? 6: 4;
1144
5/8
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
78 PetscCall(PetscFree(filtlan->baseFilter));
1145
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(PetscMalloc1((2*filtlan->baseDegree+2)*(npoints-1),&filtlan->baseFilter));
1146
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(filtlan->baseFilter,filtlan->intervals,npoints,HighLowFlags,filtlan->baseDegree));
1147
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(PetscInfo(st,"Computed value of yLimit = %g\n",(double)filtlan->filterInfo->yLimit));
1148 }
1149 88 ctx->filtch = PETSC_FALSE;
1150
1151 /* create shell matrix*/
1152
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
88 if (!*G) {
1153
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(MatGetSize(ctx->T,&N,&M));
1154
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(MatGetLocalSize(ctx->T,&n,&m));
1155
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(MatCreateShell(PetscObjectComm((PetscObject)st),n,m,N,M,st,G));
1156
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(MatShellSetOperation(*G,MATOP_MULT,(PetscErrorCodeFn*)MatMult_FILTLAN));
1157
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSE,MATDENSE));
1158
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSECUDA,MATDENSECUDA));
1159
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
78 PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSEHIP,MATDENSEHIP));
1160 }
1161
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
16 PetscFunctionReturn(PETSC_SUCCESS);
1162 }
1163
1164 98 static PetscErrorCode STFilterGetThreshold_Filter_FILTLAN(ST st,PetscReal *gamma)
1165 {
1166 98 ST_FILTER *ctx = (ST_FILTER*)st->data;
1167 98 FILTLAN_CTX filtlan = (FILTLAN_CTX)ctx->data;
1168
1169
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
98 PetscFunctionBegin;
1170 98 *gamma = filtlan->filterInfo->yLimit;
1171
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
98 PetscFunctionReturn(PETSC_SUCCESS);
1172 }
1173
1174 68 static PetscErrorCode STDestroy_Filter_FILTLAN(ST st)
1175 {
1176 68 ST_FILTER *ctx = (ST_FILTER*)st->data;
1177 68 FILTLAN_CTX filtlan = (FILTLAN_CTX)ctx->data;
1178
1179
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
68 PetscFunctionBegin;
1180
5/8
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
68 PetscCall(PetscFree(filtlan->opts));
1181
5/8
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
68 PetscCall(PetscFree(filtlan->filterInfo));
1182
5/8
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
68 PetscCall(PetscFree(filtlan->baseFilter));
1183
6/8
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
68 PetscCall(PetscFree(filtlan));
1184
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
12 PetscFunctionReturn(PETSC_SUCCESS);
1185 }
1186
1187 68 PetscErrorCode STCreate_Filter_FILTLAN(ST st)
1188 {
1189 68 ST_FILTER *ctx = (ST_FILTER*)st->data;
1190 68 FILTLAN_CTX filtlan;
1191 68 FILTLAN_IOP iop;
1192 68 FILTLAN_PFI pfi;
1193
1194
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
68 PetscFunctionBegin;
1195
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
68 PetscCall(PetscNew(&filtlan));
1196 68 ctx->data = (void*)filtlan;
1197 68 filtlan->baseDegree = 10;
1198
1199
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
68 PetscCall(PetscNew(&iop));
1200 68 filtlan->opts = iop;
1201 68 iop->intervalWeights[0] = 100.0;
1202 68 iop->intervalWeights[1] = 1.0;
1203 68 iop->intervalWeights[2] = 1.0;
1204 68 iop->intervalWeights[3] = 1.0;
1205 68 iop->intervalWeights[4] = 100.0;
1206 68 iop->transIntervalRatio = 0.6;
1207 68 iop->reverseInterval = PETSC_FALSE;
1208 68 iop->initialPlateau = 0.1;
1209 68 iop->plateauShrinkRate = 1.5;
1210 68 iop->initialShiftStep = 0.01;
1211 68 iop->shiftStepExpanRate = 1.5;
1212 68 iop->maxInnerIter = 30;
1213 68 iop->yLimitTol = 0.0001;
1214 68 iop->maxOuterIter = 50;
1215 68 iop->numGridPoints = 200;
1216 68 iop->yBottomLine = 0.001;
1217 68 iop->yRippleLimit = 0.8;
1218
1219
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
68 PetscCall(PetscNew(&pfi));
1220 68 filtlan->filterInfo = pfi;
1221
1222 68 ctx->computeoperator = STComputeOperator_Filter_FILTLAN;
1223 68 ctx->getthreshold = STFilterGetThreshold_Filter_FILTLAN;
1224 68 ctx->destroy = STDestroy_Filter_FILTLAN;
1225
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
68 PetscFunctionReturn(PETSC_SUCCESS);
1226 }
1227