Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3 | SLEPc - Scalable Library for Eigenvalue Problem Computations | ||
4 | Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain | ||
5 | |||
6 | This file is part of SLEPc. | ||
7 | SLEPc is distributed under a 2-clause BSD license (see LICENSE). | ||
8 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
9 | */ | ||
10 | /* | ||
11 | This file is an adaptation of several subroutines from FILTLAN, the | ||
12 | Filtered Lanczos Package, authored by Haw-ren Fang and Yousef Saad. | ||
13 | |||
14 | More information at: | ||
15 | https://www-users.cs.umn.edu/~saad/software/filtlan | ||
16 | |||
17 | References: | ||
18 | |||
19 | [1] H. Fang and Y. Saad, "A filtered Lanczos procedure for extreme and interior | ||
20 | eigenvalue problems", SIAM J. Sci. Comput. 34(4):A2220-A2246, 2012. | ||
21 | */ | ||
22 | |||
23 | #include <slepc/private/stimpl.h> | ||
24 | #include "filter.h" | ||
25 | |||
26 | static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal*,PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal*,PetscInt); | ||
27 | static PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal); | ||
28 | static PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt,PetscReal,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*); | ||
29 | |||
30 | /* //////////////////////////////////////////////////////////////////////////// | ||
31 | // Newton - Hermite Polynomial Interpolation | ||
32 | //////////////////////////////////////////////////////////////////////////// */ | ||
33 | |||
34 | /* | ||
35 | FILTLAN function NewtonPolynomial | ||
36 | |||
37 | build P(z) by Newton's divided differences in the form | ||
38 | P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1)), | ||
39 | such that P(x(i)) = y(i) for i=1,...,n, where | ||
40 | x,y are input vectors of length n, and a is the output vector of length n | ||
41 | if x(i)==x(j) for some i!=j, then it is assumed that the derivative of P(z) is to be zero at x(i), | ||
42 | and the Hermite polynomial interpolation is applied | ||
43 | in general, if there are k x(i)'s with the same value x0, then | ||
44 | the j-th order derivative of P(z) is zero at z=x0 for j=1,...,k-1 | ||
45 | */ | ||
46 | 19496 | static inline PetscErrorCode FILTLAN_NewtonPolynomial(PetscInt n,PetscReal *x,PetscReal *y,PetscReal *sa,PetscReal *sf) | |
47 | { | ||
48 | 19496 | PetscReal d,*sx=x,*sy=y; | |
49 | 19496 | PetscInt j,k; | |
50 | |||
51 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
19496 | PetscFunctionBegin; |
52 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
19496 | PetscCall(PetscArraycpy(sf,sy,n)); |
53 | |||
54 | /* apply Newton's finite difference method */ | ||
55 | 19496 | sa[0] = sf[0]; | |
56 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
428912 | for (j=1;j<n;j++) { |
57 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
4912992 | for (k=n-1;k>=j;k--) { |
58 | 4503576 | d = sx[k]-sx[k-j]; | |
59 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
4503576 | if (PetscUnlikely(d == 0.0)) sf[k] = 0.0; /* assume that the derivative is 0.0 and apply the Hermite interpolation */ |
60 | 2167836 | else sf[k] = (sf[k]-sf[k-1]) / d; | |
61 | } | ||
62 | 409416 | sa[j] = sf[j]; | |
63 | } | ||
64 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
3544 | PetscFunctionReturn(PETSC_SUCCESS); |
65 | } | ||
66 | |||
67 | /* | ||
68 | FILTLAN function HermiteBaseFilterInChebyshevBasis | ||
69 | |||
70 | compute a base filter P(z) which is a continuous, piecewise polynomial P(z) expanded | ||
71 | in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials in each interval | ||
72 | |||
73 | The base filter P(z) equals P_j(z) for z in the j-th interval [intv(j), intv(j+1)), where | ||
74 | P_j(z) a Hermite interpolating polynomial | ||
75 | |||
76 | input: | ||
77 | intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1)) | ||
78 | HiLowFlags determines the shape of the base filter P(z) | ||
79 | Consider the j-th interval [intv(j), intv(j+1)] | ||
80 | HighLowFlag[j-1]==1, P(z)==1 for z in [intv(j), intv(j+1)] | ||
81 | ==0, P(z)==0 for z in [intv(j), intv(j+1)] | ||
82 | ==-1, [intv(j), intv(j+1)] is a transition interval; | ||
83 | P(intv(j)) and P(intv(j+1)) are defined such that P(z) is continuous | ||
84 | baseDeg is the degree of smoothness of the Hermite (piecewise polynomial) interpolation | ||
85 | to be precise, the i-th derivative of P(z) is zero, i.e. d^{i}P(z)/dz^i==0, at all interval | ||
86 | end points z=intv(j) for i=1,...,baseDeg | ||
87 | |||
88 | output: | ||
89 | P(z) expanded in a basis of `translated' (scale-and-shift) Chebyshev polynomials | ||
90 | to be precise, for z in the j-th interval [intv(j),intv(j+1)), P(z) equals | ||
91 | P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z), | ||
92 | where S_i(z) is the `translated' Chebyshev polynomial in that interval, | ||
93 | S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2, | ||
94 | with T_i(z) the Chebyshev polynomial of the first kind, | ||
95 | T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2 | ||
96 | the return matrix is the matrix of Chebyshev coefficients pp just described | ||
97 | |||
98 | note that the degree of P(z) in each interval is (at most) 2*baseDeg+1, with 2*baseDeg+2 coefficients | ||
99 | let n be the length of intv; then there are n-1 intervals | ||
100 | therefore the return matrix pp is of size (2*baseDeg+2)-by-(n-1) | ||
101 | */ | ||
102 | 9748 | static PetscErrorCode FILTLAN_HermiteBaseFilterInChebyshevBasis(PetscReal *baseFilter,PetscReal *intv,PetscInt npoints,const PetscInt *HighLowFlags,PetscInt baseDeg) | |
103 | { | ||
104 | 9748 | PetscInt m,ii,jj; | |
105 | 9748 | PetscReal flag,flag0,flag2,aa,bb,*px,*py,*sx,*sy,*pp,*qq,*sq,*sbf,*work,*currentPoint = intv; | |
106 | 9748 | const PetscInt *hilo = HighLowFlags; | |
107 | |||
108 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
9748 | PetscFunctionBegin; |
109 | 9748 | m = 2*baseDeg+2; | |
110 | 9748 | jj = npoints-1; /* jj is initialized as the number of intervals */ | |
111 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
9748 | PetscCall(PetscMalloc5(m,&px,m,&py,m,&pp,m,&qq,m,&work)); |
112 | sbf = baseFilter; | ||
113 | |||
114 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
58488 | while (jj--) { /* the main loop to compute the Chebyshev coefficients */ |
115 | |||
116 | 48740 | flag = (PetscReal)(*hilo++); /* get the flag of the current interval */ | |
117 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
48740 | if (flag == -1.0) { /* flag == -1 means that the current interval is a transition polynomial */ |
118 | |||
119 | 19496 | flag2 = (PetscReal)(*hilo); /* get flag2, the flag of the next interval */ | |
120 | 19496 | flag0 = 1.0-flag2; /* the flag of the previous interval is 1-flag2 */ | |
121 | |||
122 | /* two pointers for traversing x[] and y[] */ | ||
123 | 19496 | sx = px; | |
124 | 19496 | sy = py; | |
125 | |||
126 | /* find the current interval [aa,bb] */ | ||
127 | 19496 | aa = *currentPoint++; | |
128 | 19496 | bb = *currentPoint; | |
129 | |||
130 | /* now left-hand side */ | ||
131 | 19496 | ii = baseDeg+1; | |
132 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
233952 | while (ii--) { |
133 | 214456 | *sy++ = flag0; | |
134 | 214456 | *sx++ = aa; | |
135 | } | ||
136 | |||
137 | /* now right-hand side */ | ||
138 | ii = baseDeg+1; | ||
139 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
233952 | while (ii--) { |
140 | 214456 | *sy++ = flag2; | |
141 | 214456 | *sx++ = bb; | |
142 | } | ||
143 | |||
144 | /* build a Newton polynomial (indeed, the generalized Hermite interpolating polynomial) with x[] and y[] */ | ||
145 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
19496 | PetscCall(FILTLAN_NewtonPolynomial(m,px,py,pp,work)); |
146 | |||
147 | /* pp contains coefficients of the Newton polynomial P(z) in the current interval [aa,bb], where | ||
148 | P(z) = pp(1) + pp(2)*(z-px(1)) + pp(3)*(z-px(1))*(z-px(2)) + ... + pp(n)*(z-px(1))*...*(z-px(n-1)) */ | ||
149 | |||
150 | /* translate the Newton coefficients to the Chebyshev coefficients */ | ||
151 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
19496 | PetscCall(FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(m,aa,bb,pp,px,qq,work)); |
152 | /* qq contains coefficients of the polynomial in [aa,bb] in the `translated' Chebyshev basis */ | ||
153 | |||
154 | /* copy the Chebyshev coefficients to baseFilter | ||
155 | OCTAVE/MATLAB: B(:,j) = qq, where j = (npoints-1)-jj and B is the return matrix */ | ||
156 | 19496 | sq = qq; | |
157 | 19496 | ii = 2*baseDeg+2; | |
158 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
448408 | while (ii--) *sbf++ = *sq++; |
159 | |||
160 | } else { | ||
161 | |||
162 | /* a constant polynomial P(z)=flag, where either flag==0 or flag==1 | ||
163 | OCTAVE/MATLAB: B(1,j) = flag, where j = (npoints-1)-jj and B is the return matrix */ | ||
164 | 29244 | *sbf++ = flag; | |
165 | |||
166 | /* the other coefficients are all zero, since P(z) is a constant | ||
167 | OCTAVE/MATLAB: B(1,j) = 0, where j = (npoints-1)-jj and B is the return matrix */ | ||
168 | 29244 | ii = 2*baseDeg+1; | |
169 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
643368 | while (ii--) *sbf++ = 0.0; |
170 | |||
171 | /* for the next point */ | ||
172 | 29244 | currentPoint++; | |
173 | } | ||
174 | } | ||
175 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
9748 | PetscCall(PetscFree5(px,py,pp,qq,work)); |
176 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
1772 | PetscFunctionReturn(PETSC_SUCCESS); |
177 | } | ||
178 | |||
179 | /* //////////////////////////////////////////////////////////////////////////// | ||
180 | // Base Filter | ||
181 | //////////////////////////////////////////////////////////////////////////// */ | ||
182 | |||
183 | /* | ||
184 | FILTLAN function GetIntervals | ||
185 | |||
186 | this routine determines the intervals (including the transition one(s)) by an iterative process | ||
187 | |||
188 | frame is a vector consisting of 4 ordered elements: | ||
189 | [frame(1),frame(4)] is the interval which (tightly) contains all eigenvalues, and | ||
190 | [frame(2),frame(3)] is the interval in which the eigenvalues are sought | ||
191 | baseDeg is the left-and-right degree of the base filter for each interval | ||
192 | polyDeg is the (maximum possible) degree of s(z), with z*s(z) the polynomial filter | ||
193 | intv is the output; the j-th interval is [intv(j),intv(j+1)) | ||
194 | opts is a collection of interval options | ||
195 | |||
196 | the base filter P(z) is a piecewise polynomial from Hermite interpolation with degree baseDeg | ||
197 | at each end point of intervals | ||
198 | |||
199 | the polynomial filter Q(z) is in the form z*s(z), i.e. Q(0)==0, such that ||(1-P(z))-z*s(z)||_w is | ||
200 | minimized with s(z) a polynomial of degree up to polyDeg | ||
201 | |||
202 | the resulting polynomial filter Q(z) satisfies Q(x)>=Q(y) for x in [frame[1],frame[2]], and | ||
203 | y in [frame[0],frame[3]] but not in [frame[1],frame[2]] | ||
204 | |||
205 | the routine fills a PolynomialFilterInfo struct which gives some information of the polynomial filter | ||
206 | */ | ||
207 | 78 | static PetscErrorCode FILTLAN_GetIntervals(PetscReal *intervals,PetscReal *frame,PetscInt polyDeg,PetscInt baseDeg,FILTLAN_IOP opts,FILTLAN_PFI filterInfo) | |
208 | { | ||
209 | 78 | PetscReal intv[6],x,y,z1,z2,c,c1,c2,fc,fc2,halfPlateau,leftDelta,rightDelta,gridSize; | |
210 | 78 | PetscReal yLimit,ySummit,yLeftLimit,yRightLimit,bottom,qIndex,*baseFilter,*polyFilter; | |
211 | 78 | PetscReal yLimitGap=0.0,yLeftSummit=0.0,yLeftBottom=0.0,yRightSummit=0.0,yRightBottom=0.0; | |
212 | 78 | PetscInt i,ii,npoints,numIter,numLeftSteps=1,numRightSteps=1,numMoreLooked=0; | |
213 | 78 | PetscBool leftBoundaryMet=PETSC_FALSE,rightBoundaryMet=PETSC_FALSE,stepLeft,stepRight; | |
214 | 78 | const PetscReal a=frame[0],a1=frame[1],b1=frame[2],b=frame[3]; | |
215 | 78 | const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 }; /* if filterType is 1, only first 3 elements will be used */ | |
216 | 78 | const PetscInt numLookMore = 2*(PetscInt)(0.5+(PetscLogReal(2.0)/PetscLogReal(opts->shiftStepExpanRate))); | |
217 | |||
218 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
78 | PetscFunctionBegin; |
219 |
3/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 10 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
78 | PetscCheck(a<=a1 && a1<=b1 && b1<=b,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Values in the frame vector should be non-decreasing"); |
220 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
78 | PetscCheck(a1!=b1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The range of wanted eigenvalues cannot be of size zero"); |
221 | 78 | filterInfo->filterType = 2; /* mid-pass filter, for interior eigenvalues */ | |
222 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
78 | if (b == b1) { |
223 | ✗ | PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"A polynomial filter should not cover all eigenvalues"); | |
224 | ✗ | filterInfo->filterType = 1; /* high-pass filter, for largest eigenvalues */ | |
225 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
78 | } else PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"filterType==3 for smallest eigenvalues should be pre-converted to filterType==1 for largest eigenvalues"); |
226 | |||
227 | /* the following recipe follows Yousef Saad (2005, 2006) with a few minor adaptations / enhancements */ | ||
228 | 78 | halfPlateau = 0.5*(b1-a1)*opts->initialPlateau; /* half length of the "plateau" of the (dual) base filter */ | |
229 | 78 | leftDelta = (b1-a1)*opts->initialShiftStep; /* initial left shift */ | |
230 | 78 | rightDelta = leftDelta; /* initial right shift */ | |
231 | 78 | opts->numGridPoints = PetscMax(opts->numGridPoints,(PetscInt)(2.0*(b-a)/halfPlateau)); | |
232 | 78 | gridSize = (b-a) / (PetscReal)opts->numGridPoints; | |
233 | |||
234 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
546 | for (i=0;i<6;i++) intv[i] = 0.0; |
235 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
78 | if (filterInfo->filterType == 2) { /* for interior eigenvalues */ |
236 | 78 | npoints = 6; | |
237 | 78 | intv[0] = a; | |
238 | 78 | intv[5] = b; | |
239 | /* intv[1], intv[2], intv[3], and intv[4] to be determined */ | ||
240 | } else { /* filterType == 1 (or 3 with conversion), for extreme eigenvalues */ | ||
241 | ✗ | npoints = 4; | |
242 | ✗ | intv[0] = a; | |
243 | ✗ | intv[3] = b; | |
244 | /* intv[1], and intv[2] to be determined */ | ||
245 | } | ||
246 | 78 | z1 = a1 - leftDelta; | |
247 | 78 | z2 = b1 + rightDelta; | |
248 | 78 | filterInfo->filterOK = 0; /* not yet found any OK filter */ | |
249 | |||
250 | /* allocate matrices */ | ||
251 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(PetscMalloc2((polyDeg+2)*(npoints-1),&polyFilter,(2*baseDeg+2)*(npoints-1),&baseFilter)); |
252 | |||
253 | /* initialize the intervals, mainly for the case opts->maxOuterIter == 0 */ | ||
254 | 78 | intervals[0] = intv[0]; | |
255 | 78 | intervals[3] = intv[3]; | |
256 | 78 | intervals[5] = intv[5]; | |
257 | 78 | intervals[1] = z1; | |
258 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
78 | if (filterInfo->filterType == 2) { /* a mid-pass filter for interior eigenvalues */ |
259 | 78 | intervals[4] = z2; | |
260 | 78 | c = (a1+b1) / 2.0; | |
261 | 78 | intervals[2] = c - halfPlateau; | |
262 | 78 | intervals[3] = c + halfPlateau; | |
263 | } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */ | ||
264 | ✗ | intervals[2] = z1 + (b1-z1)*opts->transIntervalRatio; | |
265 | } | ||
266 | |||
267 | /* the main loop */ | ||
268 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
790 | for (numIter=1;numIter<=opts->maxOuterIter;numIter++) { |
269 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
790 | if (z1 <= a) { /* outer loop updates z1 and z2 */ |
270 | 180 | z1 = a; | |
271 | 180 | leftBoundaryMet = PETSC_TRUE; | |
272 | } | ||
273 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
790 | if (filterInfo->filterType == 2) { /* a <= z1 < (a1) */ |
274 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
790 | if (z2 >= b) { /* a mid-pass filter for interior eigenvalues */ |
275 | ✗ | z2 = b; | |
276 | ✗ | rightBoundaryMet = PETSC_TRUE; | |
277 | } | ||
278 | /* a <= z1 < c-h < c+h < z2 <= b, where h is halfPlateau */ | ||
279 | /* [z1, c-h] and [c+h, z2] are transition interval */ | ||
280 | 790 | intv[1] = z1; | |
281 | 790 | intv[4] = z2; | |
282 | 790 | c1 = z1 + halfPlateau; | |
283 | 790 | intv[2] = z1; /* i.e. c1 - halfPlateau */ | |
284 | 790 | intv[3] = c1 + halfPlateau; | |
285 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
790 | PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg)); |
286 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
790 | PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg)); |
287 | /* fc1 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); */ | ||
288 | 790 | c2 = z2 - halfPlateau; | |
289 | 790 | intv[2] = c2 - halfPlateau; | |
290 | 790 | intv[3] = z2; /* i.e. c2 + halfPlateau */ | |
291 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
790 | PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg)); |
292 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
790 | PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg)); |
293 | 790 | fc2 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); | |
294 | 790 | yLimitGap = PETSC_MAX_REAL; | |
295 | 790 | ii = opts->maxInnerIter; | |
296 |
3/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
8880 | while (ii-- && !(yLimitGap <= opts->yLimitTol)) { |
297 | /* recursive bisection to get c such that p(a1) are p(b1) approximately the same */ | ||
298 | 8090 | c = (c1+c2) / 2.0; | |
299 | 8090 | intv[2] = c - halfPlateau; | |
300 | 8090 | intv[3] = c + halfPlateau; | |
301 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
8090 | PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg)); |
302 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
8090 | PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg)); |
303 | 8090 | fc = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); | |
304 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
8090 | if (fc*fc2 < 0.0) { |
305 | c1 = c; | ||
306 | /* fc1 = fc; */ | ||
307 | } else { | ||
308 | 4447 | c2 = c; | |
309 | 4447 | fc2 = fc; | |
310 | } | ||
311 | 8090 | yLimitGap = PetscAbsReal(fc); | |
312 | } | ||
313 | } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */ | ||
314 | ✗ | intv[1] = z1; | |
315 | ✗ | intv[2] = z1 + (b1-z1)*opts->transIntervalRatio; | |
316 | ✗ | PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,4,HighLowFlags,baseDeg)); | |
317 | ✗ | PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,4,opts->intervalWeights,polyDeg)); | |
318 | } | ||
319 | /* polyFilter contains the coefficients of the polynomial filter which approximates phi(x) | ||
320 | expanded in the `translated' Chebyshev basis */ | ||
321 | /* psi(x) = 1.0 - phi(x) is the dual base filter approximated by a polynomial in the form x*p(x) */ | ||
322 | 790 | yLeftLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); | |
323 | 790 | yRightLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1); | |
324 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
790 | yLimit = (yLeftLimit < yRightLimit) ? yLeftLimit : yRightLimit; |
325 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
790 | ySummit = (yLeftLimit > yRightLimit) ? yLeftLimit : yRightLimit; |
326 | x = a1; | ||
327 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
31600 | while ((x+=gridSize) < b1) { |
328 | 30810 | y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x); | |
329 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
30810 | if (y < yLimit) yLimit = y; |
330 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
30810 | if (y > ySummit) ySummit = y; |
331 | } | ||
332 | /* now yLimit is the minimum of x*p(x) for x in [a1, b1] */ | ||
333 | 790 | stepLeft = PETSC_FALSE; | |
334 | 790 | stepRight = PETSC_FALSE; | |
335 |
6/6✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
✓ Branch 4 taken 10 times.
✓ Branch 5 taken 10 times.
|
790 | if ((yLimit < yLeftLimit && yLimit < yRightLimit) || yLimit < opts->yBottomLine) { |
336 | /* very bad, step to see what will happen */ | ||
337 | 250 | stepLeft = PETSC_TRUE; | |
338 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
250 | if (filterInfo->filterType == 2) stepRight = PETSC_TRUE; |
339 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
540 | } else if (filterInfo->filterType == 2) { |
340 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
540 | if (yLeftLimit < yRightLimit) { |
341 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
302 | if (yRightLimit-yLeftLimit > opts->yLimitTol) stepLeft = PETSC_TRUE; |
342 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
238 | } else if (yLeftLimit-yRightLimit > opts->yLimitTol) stepRight = PETSC_TRUE; |
343 | } | ||
344 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
790 | if (!stepLeft) { |
345 | yLeftBottom = yLeftLimit; | ||
346 | x = a1; | ||
347 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
28144 | while ((x-=gridSize) >= a) { |
348 | 28094 | y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x); | |
349 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
28094 | if (y < yLeftBottom) yLeftBottom = y; |
350 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
|
490 | else if (y > yLeftBottom) break; |
351 | } | ||
352 | yLeftSummit = yLeftBottom; | ||
353 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1670316 | while ((x-=gridSize) >= a) { |
354 | 1669776 | y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x); | |
355 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1669776 | if (y > yLeftSummit) { |
356 | 23866 | yLeftSummit = y; | |
357 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
23866 | if (yLeftSummit > yLimit*opts->yRippleLimit) { |
358 | stepLeft = PETSC_TRUE; | ||
359 | break; | ||
360 | } | ||
361 | } | ||
362 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1669776 | if (y < yLeftBottom) yLeftBottom = y; |
363 | } | ||
364 | } | ||
365 |
3/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
790 | if (filterInfo->filterType == 2 && !stepRight) { |
366 | yRightBottom = yRightLimit; | ||
367 | x = b1; | ||
368 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
29748 | while ((x+=gridSize) <= b) { |
369 | 29748 | y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x); | |
370 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
29748 | if (y < yRightBottom) yRightBottom = y; |
371 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
|
540 | else if (y > yRightBottom) break; |
372 | } | ||
373 | yRightSummit = yRightBottom; | ||
374 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1328372 | while ((x+=gridSize) <= b) { |
375 | 1327832 | y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x); | |
376 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1327832 | if (y > yRightSummit) { |
377 | 23970 | yRightSummit = y; | |
378 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
23970 | if (yRightSummit > yLimit*opts->yRippleLimit) { |
379 | stepRight = PETSC_TRUE; | ||
380 | break; | ||
381 | } | ||
382 | } | ||
383 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1327832 | if (y < yRightBottom) yRightBottom = y; |
384 | } | ||
385 | } | ||
386 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
790 | if (!stepLeft && !stepRight) { |
387 |
3/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
540 | if (filterInfo->filterType == 2) bottom = PetscMin(yLeftBottom,yRightBottom); |
388 | else bottom = yLeftBottom; | ||
389 | 540 | qIndex = 1.0 - (yLimit-bottom) / (ySummit-bottom); | |
390 |
4/4✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
540 | if (filterInfo->filterOK == 0 || filterInfo->filterQualityIndex < qIndex) { |
391 | /* found the first OK filter or a better filter */ | ||
392 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1596 | for (i=0;i<6;i++) intervals[i] = intv[i]; |
393 | 228 | filterInfo->filterOK = 1; | |
394 | 228 | filterInfo->filterQualityIndex = qIndex; | |
395 | 228 | filterInfo->numIter = numIter; | |
396 | 228 | filterInfo->yLimit = yLimit; | |
397 | 228 | filterInfo->ySummit = ySummit; | |
398 | 228 | filterInfo->numLeftSteps = numLeftSteps; | |
399 | 228 | filterInfo->yLeftSummit = yLeftSummit; | |
400 | 228 | filterInfo->yLeftBottom = yLeftBottom; | |
401 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
228 | if (filterInfo->filterType == 2) { |
402 | 228 | filterInfo->yLimitGap = yLimitGap; | |
403 | 228 | filterInfo->numRightSteps = numRightSteps; | |
404 | 228 | filterInfo->yRightSummit = yRightSummit; | |
405 | 228 | filterInfo->yRightBottom = yRightBottom; | |
406 | } | ||
407 | numMoreLooked = 0; | ||
408 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
312 | } else if (++numMoreLooked == numLookMore) { |
409 | /* filter has been optimal */ | ||
410 | 78 | filterInfo->filterOK = 2; | |
411 | 78 | break; | |
412 | } | ||
413 | /* try stepping further to see whether it can improve */ | ||
414 | 462 | stepLeft = PETSC_TRUE; | |
415 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
462 | if (filterInfo->filterType == 2) stepRight = PETSC_TRUE; |
416 | } | ||
417 | /* check whether we can really "step" */ | ||
418 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
712 | if (leftBoundaryMet) { |
419 |
2/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
|
140 | if (filterInfo->filterType == 1 || rightBoundaryMet) break; /* cannot step further, so break the loop */ |
420 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
140 | if (stepLeft) { |
421 | /* cannot step left, so try stepping right */ | ||
422 | 140 | stepLeft = PETSC_FALSE; | |
423 | 140 | stepRight = PETSC_TRUE; | |
424 | } | ||
425 | } | ||
426 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
712 | if (rightBoundaryMet && stepRight) { |
427 | /* cannot step right, so try stepping left */ | ||
428 | stepRight = PETSC_FALSE; | ||
429 | stepLeft = PETSC_TRUE; | ||
430 | } | ||
431 | /* now "step" */ | ||
432 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
712 | if (stepLeft) { |
433 | 572 | numLeftSteps++; | |
434 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
572 | if (filterInfo->filterType == 2) leftDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */ |
435 | 572 | z1 -= leftDelta; | |
436 | } | ||
437 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
712 | if (stepRight) { |
438 | 712 | numRightSteps++; | |
439 | 712 | rightDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */ | |
440 | 712 | z2 += rightDelta; | |
441 | } | ||
442 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
712 | if (filterInfo->filterType == 2) { |
443 | /* shrink the "plateau" of the (dual) base filter */ | ||
444 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
712 | if (stepLeft && stepRight) halfPlateau /= opts->plateauShrinkRate; |
445 | 140 | else halfPlateau /= PetscSqrtReal(opts->plateauShrinkRate); | |
446 | } | ||
447 | } | ||
448 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
78 | PetscCheck(filterInfo->filterOK,PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"STFILTER cannot get the filter specified; please adjust your filter parameters (e.g. increasing the polynomial degree)"); |
449 | |||
450 | 78 | filterInfo->totalNumIter = numIter; | |
451 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(PetscFree2(polyFilter,baseFilter)); |
452 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
14 | PetscFunctionReturn(PETSC_SUCCESS); |
453 | } | ||
454 | |||
455 | /* //////////////////////////////////////////////////////////////////////////// | ||
456 | // Chebyshev Polynomials | ||
457 | //////////////////////////////////////////////////////////////////////////// */ | ||
458 | |||
459 | /* | ||
460 | FILTLAN function ExpandNewtonPolynomialInChebyshevBasis | ||
461 | |||
462 | translate the coefficients of a Newton polynomial to the coefficients | ||
463 | in a basis of the `translated' (scale-and-shift) Chebyshev polynomials | ||
464 | |||
465 | input: | ||
466 | a Newton polynomial defined by vectors a and x: | ||
467 | P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1)) | ||
468 | the interval [aa,bb] defines the `translated' Chebyshev polynomials S_i(z) = T_i((z-c)/h), | ||
469 | where c=(aa+bb)/2 and h=(bb-aa)/2, and T_i is the Chebyshev polynomial of the first kind | ||
470 | note that T_i is defined by T_0(z)=1, T_1(z)=z, and T_i(z)=2*z*T_{i-1}(z)+T_{i-2}(z) for i>=2 | ||
471 | |||
472 | output: | ||
473 | a vector q containing the Chebyshev coefficients: | ||
474 | P(z) = q(1)*S_0(z) + q(2)*S_1(z) + ... + q(n)*S_{n-1}(z) | ||
475 | */ | ||
476 | 19496 | static inline PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt n,PetscReal aa,PetscReal bb,PetscReal *a,PetscReal *x,PetscReal *q,PetscReal *q2) | |
477 | { | ||
478 | 19496 | PetscInt m,mm; | |
479 | 19496 | PetscReal *sa,*sx,*sq,*sq2,c,c2,h,h2; | |
480 | |||
481 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
19496 | PetscFunctionBegin; |
482 | 19496 | sa = a+n; /* pointers for traversing a and x */ | |
483 | 19496 | sx = x+n-1; | |
484 | 19496 | *q = *--sa; /* set q[0] = a(n) */ | |
485 | |||
486 | 19496 | c = (aa+bb)/2.0; | |
487 | 19496 | h = (bb-aa)/2.0; | |
488 | 19496 | h2 = h/2.0; | |
489 | |||
490 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
428912 | for (m=1;m<=n-1;m++) { /* the main loop for translation */ |
491 | |||
492 | /* compute q2[0:m-1] = (c-x[n-m-1])*q[0:m-1] */ | ||
493 | 409416 | mm = m; | |
494 | 409416 | sq = q; | |
495 | 409416 | sq2 = q2; | |
496 | 409416 | c2 = c-(*--sx); | |
497 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
4912992 | while (mm--) *(sq2++) = c2*(*sq++); |
498 | 409416 | *sq2 = 0.0; /* q2[m] = 0.0 */ | |
499 | 409416 | *(q2+1) += h*(*q); /* q2[1] = q2[1] + h*q[0] */ | |
500 | |||
501 | /* compute q2[0:m-2] = q2[0:m-2] + h2*q[1:m-1] */ | ||
502 | 409416 | mm = m-1; | |
503 | 409416 | sq2 = q2; | |
504 | 409416 | sq = q+1; | |
505 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
4503576 | while (mm--) *(sq2++) += h2*(*sq++); |
506 | |||
507 | /* compute q2[2:m] = q2[2:m] + h2*q[1:m-1] */ | ||
508 | 409416 | mm = m-1; | |
509 | 409416 | sq2 = q2+2; | |
510 | 409416 | sq = q+1; | |
511 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
4503576 | while (mm--) *(sq2++) += h2*(*sq++); |
512 | |||
513 | /* compute q[0:m] = q2[0:m] */ | ||
514 | 409416 | mm = m+1; | |
515 | 409416 | sq2 = q2; | |
516 | 409416 | sq = q; | |
517 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
5322408 | while (mm--) *sq++ = *sq2++; |
518 | 409416 | *q += (*--sa); /* q[0] = q[0] + p[n-m-1] */ | |
519 | } | ||
520 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
19496 | PetscFunctionReturn(PETSC_SUCCESS); |
521 | } | ||
522 | |||
523 | /* | ||
524 | FILTLAN function PolynomialEvaluationInChebyshevBasis | ||
525 | |||
526 | evaluate P(z) at z=z0, where P(z) is a polynomial expanded in a basis of | ||
527 | the `translated' (i.e. scale-and-shift) Chebyshev polynomials | ||
528 | |||
529 | input: | ||
530 | c is a vector of Chebyshev coefficients which defines the polynomial | ||
531 | P(z) = c(1)*S_0(z) + c(2)*S_1(z) + ... + c(n)*S_{n-1}(z), | ||
532 | where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with | ||
533 | c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2 | ||
534 | note that T_i(z) is the Chebyshev polynomial of the first kind, | ||
535 | T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2 | ||
536 | |||
537 | output: | ||
538 | the evaluated value of P(z) at z=z0 | ||
539 | */ | ||
540 | 3105600 | static inline PetscReal FILTLAN_PolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscInt idx,PetscReal z0,PetscReal aa,PetscReal bb) | |
541 | { | ||
542 | 3105600 | PetscInt ii,deg1; | |
543 | 3105600 | PetscReal y,zz,t0,t1,t2,*sc; | |
544 | |||
545 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
3105600 | PetscFunctionBegin; |
546 | 3105600 | deg1 = m; | |
547 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
3105600 | if (aa==-1.0 && bb==1.0) zz = z0; /* treat it as a special case to reduce rounding errors */ |
548 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
3105600 | else zz = (aa==bb)? 0.0 : -1.0+2.0*(z0-aa)/(bb-aa); |
549 | |||
550 | /* compute y = P(z0), where we utilize the Chebyshev recursion */ | ||
551 | 3105600 | sc = pp+(idx-1)*m; /* sc points to column idx of pp */ | |
552 | 3105600 | y = *sc++; | |
553 | 3105600 | t1 = 1.0; t2 = zz; | |
554 | 3105600 | ii = deg1-1; | |
555 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
616263700 | while (ii--) { |
556 | /* Chebyshev recursion: T_0(zz)=1, T_1(zz)=zz, and T_{i+1}(zz) = 2*zz*T_i(zz) + T_{i-1}(zz) for i>=2 | ||
557 | the values of T_{i+1}(zz), T_i(zz), T_{i-1}(zz) are stored in t0, t1, t2, respectively */ | ||
558 | 613158100 | t0 = 2*zz*t1 - t2; | |
559 | /* it also works for the base case / the first iteration, where t0 equals 2*zz*1-zz == zz which is T_1(zz) */ | ||
560 | 613158100 | t2 = t1; | |
561 | 613158100 | t1 = t0; | |
562 | 613158100 | y += (*sc++)*t0; | |
563 | } | ||
564 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
3105600 | PetscFunctionReturn(y); |
565 | } | ||
566 | |||
567 | #define basisTranslated PETSC_TRUE | ||
568 | /* | ||
569 | FILTLAN function PiecewisePolynomialEvaluationInChebyshevBasis | ||
570 | |||
571 | evaluate P(z) at z=z0, where P(z) is a piecewise polynomial expanded | ||
572 | in a basis of the (optionally translated, i.e. scale-and-shift) Chebyshev polynomials for each interval | ||
573 | |||
574 | input: | ||
575 | intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1)) | ||
576 | pp is a matrix of Chebyshev coefficients which defines a piecewise polynomial P(z) | ||
577 | in a basis of the `translated' Chebyshev polynomials in each interval | ||
578 | the polynomial P_j(z) in the j-th interval, i.e. when z is in [intv(j), intv(j+1)), is defined by the j-th column of pp: | ||
579 | if basisTranslated == false, then | ||
580 | P_j(z) = pp(1,j)*T_0(z) + pp(2,j)*T_1(z) + ... + pp(n,j)*T_{n-1}(z), | ||
581 | where T_i(z) is the Chebyshev polynomial of the first kind, | ||
582 | T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2 | ||
583 | if basisTranslated == true, then | ||
584 | P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z), | ||
585 | where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with | ||
586 | c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2 | ||
587 | |||
588 | output: | ||
589 | the evaluated value of P(z) at z=z0 | ||
590 | |||
591 | note that if z0 falls below the first interval, then the polynomial in the first interval will be used to evaluate P(z0) | ||
592 | if z0 flies over the last interval, then the polynomial in the last interval will be used to evaluate P(z0) | ||
593 | */ | ||
594 | 3105600 | static inline PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscReal *intv,PetscInt npoints,PetscReal z0) | |
595 | { | ||
596 | 3105600 | PetscReal *sintv,aa,bb,resul; | |
597 | 3105600 | PetscInt idx; | |
598 | |||
599 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
3105600 | PetscFunctionBegin; |
600 | /* determine the interval which contains z0 */ | ||
601 | 3105600 | sintv = &intv[1]; | |
602 | 3105600 | idx = 1; | |
603 |
3/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
3105600 | if (npoints>2 && z0 >= *sintv) { |
604 | 1414756 | sintv++; | |
605 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
5525928 | while (++idx < npoints-1) { |
606 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
4178064 | if (z0 < *sintv) break; |
607 | 4111172 | sintv++; | |
608 | } | ||
609 | } | ||
610 | /* idx==1 if npoints<=2; otherwise idx satisfies: | ||
611 | intv(idx) <= z0 < intv(idx+1), if 2 <= idx <= npoints-2 | ||
612 | z0 < intv(idx+1), if idx == 1 | ||
613 | intv(idx) <= z0, if idx == npoints-1 | ||
614 | in addition, sintv points to &intv(idx+1) */ | ||
615 | 3105600 | if (basisTranslated) { | |
616 | /* the basis consists of `translated' Chebyshev polynomials */ | ||
617 | /* find the interval of concern, [aa,bb] */ | ||
618 | 3105600 | aa = *(sintv-1); | |
619 | 3105600 | bb = *sintv; | |
620 | 3105600 | resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,aa,bb); | |
621 | } else { | ||
622 | /* the basis consists of standard Chebyshev polynomials, with interval [-1.0,1.0] for integration */ | ||
623 | resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,-1.0,1.0); | ||
624 | } | ||
625 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
3105600 | PetscFunctionReturn(resul); |
626 | } | ||
627 | |||
628 | /* | ||
629 | FILTLAN function PiecewisePolynomialInnerProductInChebyshevBasis | ||
630 | |||
631 | compute the weighted inner product of two piecewise polynomials expanded | ||
632 | in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval | ||
633 | |||
634 | pp and qq are two matrices of Chebyshev coefficients which define the piecewise polynomials P(z) and Q(z), respectively | ||
635 | for z in the j-th interval, P(z) equals | ||
636 | P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z), | ||
637 | and Q(z) equals | ||
638 | Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z), | ||
639 | where S_i(z) is the `translated' Chebyshev polynomial in that interval, | ||
640 | S_i((z-c)/h) = T_i(z), c = (aa+bb)) / 2, h = (bb-aa) / 2, | ||
641 | with T_i(z) the Chebyshev polynomial of the first kind | ||
642 | T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2 | ||
643 | |||
644 | the (scaled) j-th interval inner product is defined by | ||
645 | <P_j,Q_j> = (Pi/2)*(pp(1,j)*qq(1,j) + sum_{k} pp(k,j)*qq(k,j)), | ||
646 | which comes from the property | ||
647 | <T_0,T_0>=pi, <T_i,T_i>=pi/2 for i>=1, and <T_i,T_j>=0 for i!=j | ||
648 | |||
649 | the weighted inner product is <P,Q> = sum_{j} intervalWeights(j)*<P_j,Q_j>, | ||
650 | which is the return value | ||
651 | |||
652 | note that for unit weights, pass an empty vector of intervalWeights (i.e. of length 0) | ||
653 | */ | ||
654 | 6391605 | static inline PetscReal FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(PetscReal *pp,PetscInt prows,PetscInt pcols,PetscInt ldp,PetscReal *qq,PetscInt qrows,PetscInt qcols,PetscInt ldq,const PetscReal *intervalWeights) | |
655 | { | ||
656 | 6391605 | PetscInt nintv,deg1,i,k; | |
657 | 6391605 | PetscReal *sp,*sq,ans=0.0,ans2; | |
658 | |||
659 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
6391605 | PetscFunctionBegin; |
660 | 6391605 | deg1 = PetscMin(prows,qrows); /* number of effective coefficients, one more than the effective polynomial degree */ | |
661 |
2/14✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
|
6391605 | if (PetscUnlikely(!deg1)) PetscFunctionReturn(0.0); |
662 | 6391605 | nintv = PetscMin(pcols,qcols); /* number of intervals */ | |
663 | |||
664 | /* scaled by intervalWeights(i) in the i-th interval (we assume intervalWeights[] are always provided). | ||
665 | compute ans = sum_{i=1,...,nintv} intervalWeights(i)*[ pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) ] */ | ||
666 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
38349630 | for (i=0;i<nintv;i++) { /* compute ans2 = pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) */ |
667 | 31958025 | sp = pp+i*ldp; | |
668 | 31958025 | sq = qq+i*ldq; | |
669 | 31958025 | ans2 = (*sp) * (*sq); /* the first term pp(1,i)*qq(1,i) is being added twice */ | |
670 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1889817825 | for (k=0;k<deg1;k++) ans2 += (*sp++)*(*sq++); /* add pp(k,i)*qq(k,i) */ |
671 | 31958025 | ans += ans2*intervalWeights[i]; /* compute ans += ans2*intervalWeights(i) */ | |
672 | } | ||
673 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
6391605 | PetscFunctionReturn(ans*PETSC_PI/2.0); |
674 | } | ||
675 | |||
676 | /* | ||
677 | FILTLAN function PiecewisePolynomialInChebyshevBasisMultiplyX | ||
678 | |||
679 | compute Q(z) = z*P(z), where P(z) and Q(z) are piecewise polynomials expanded | ||
680 | in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval | ||
681 | |||
682 | P(z) and Q(z) are stored as matrices of Chebyshev coefficients pp and qq, respectively | ||
683 | |||
684 | For z in the j-th interval, P(z) equals | ||
685 | P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z), | ||
686 | and Q(z) equals | ||
687 | Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z), | ||
688 | where S_i(z) is the `translated' Chebyshev polynomial in that interval, | ||
689 | S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2, | ||
690 | with T_i(z) the Chebyshev polynomial of the first kind | ||
691 | T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2 | ||
692 | |||
693 | the returned matrix is qq which represents Q(z) = z*P(z) | ||
694 | */ | ||
695 | 2134605 | static inline PetscErrorCode FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(PetscReal *pp,PetscInt deg1,PetscInt ldp,PetscReal *intv,PetscInt nintv,PetscReal *qq,PetscInt ldq) | |
696 | { | ||
697 | 2134605 | PetscInt i,j; | |
698 | 2134605 | PetscReal c,h,h2,tmp,*sp,*sq,*sp2,*sq2; | |
699 | |||
700 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2134605 | PetscFunctionBegin; |
701 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
12807630 | for (j=0;j<nintv;j++) { /* consider interval between intv(j) and intv(j+1) */ |
702 | 10673025 | sp = pp+j*ldp; | |
703 | 10673025 | sq = qq+j*ldq; | |
704 | 10673025 | sp2 = sp; | |
705 | 10673025 | sq2 = sq+1; | |
706 | 10673025 | c = 0.5*(intv[j] + intv[j+1]); /* compute c = (intv(j) + intv(j+1))/2 */ | |
707 | 10673025 | h = 0.5*(intv[j+1] - intv[j]); /* compute h = (intv(j+1) - intv(j))/2 and h2 = h/2 */ | |
708 | 10673025 | h2 = 0.5*h; | |
709 | 10673025 | i = deg1; | |
710 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
826715325 | while (i--) *sq++ = c*(*sp++); /* compute q(1:deg1,j) = c*p(1:deg1,j) */ |
711 | 10673025 | *sq++ = 0.0; /* set q(deg1+1,j) = 0.0 */ | |
712 | 10673025 | *(sq2++) += h*(*sp2++); /* compute q(2,j) = q(2,j) + h*p(1,j) */ | |
713 | 10673025 | i = deg1-1; | |
714 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
816042300 | while (i--) { /* compute q(3:deg1+1,j) += h2*p(2:deg1,j) and then q(1:deg1-1,j) += h2*p(2:deg1,j) */ |
715 | 805369275 | tmp = h2*(*sp2++); | |
716 | 805369275 | *(sq2-2) += tmp; | |
717 | 805369275 | *(sq2++) += tmp; | |
718 | } | ||
719 | } | ||
720 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
2134605 | PetscFunctionReturn(PETSC_SUCCESS); |
721 | } | ||
722 | |||
723 | /* //////////////////////////////////////////////////////////////////////////// | ||
724 | // Conjugate Residual Method in the Polynomial Space | ||
725 | //////////////////////////////////////////////////////////////////////////// */ | ||
726 | |||
727 | /* | ||
728 | B := alpha*A + beta*B | ||
729 | |||
730 | A,B are nxk | ||
731 | */ | ||
732 | 8494660 | static inline PetscErrorCode Mat_AXPY_BLAS(PetscInt n,PetscInt k,PetscReal alpha,const PetscReal *A,PetscInt lda,PetscReal beta,PetscReal *B,PetscInt ldb) | |
733 | { | ||
734 | 8494660 | PetscInt i,j; | |
735 | |||
736 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
8494660 | PetscFunctionBegin; |
737 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
8494660 | if (beta==(PetscReal)1.0) { |
738 |
4/4✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
1670879500 | for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] += alpha*A[i+j*lda]; |
739 |
3/6✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
4257000 | PetscCall(PetscLogFlops(2.0*n*k)); |
740 | } else { | ||
741 |
4/4✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
1667789210 | for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] = alpha*A[i+j*lda] + beta*B[i+j*ldb]; |
742 |
3/6✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
4237660 | PetscCall(PetscLogFlops(3.0*n*k)); |
743 | } | ||
744 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
8494660 | PetscFunctionReturn(PETSC_SUCCESS); |
745 | } | ||
746 | |||
747 | /* | ||
748 | FILTLAN function FilteredConjugateResidualPolynomial | ||
749 | |||
750 | ** Conjugate Residual Method in the Polynomial Space | ||
751 | |||
752 | this routine employs a conjugate-residual-type algorithm in polynomial space to minimize ||P(z)-Q(z)||_w, | ||
753 | where P(z), the base filter, is the input piecewise polynomial, and | ||
754 | Q(z) is the output polynomial satisfying Q(0)==1, i.e. the constant term of Q(z) is 1 | ||
755 | niter is the number of conjugate-residual iterations; therefore, the degree of Q(z) is up to niter+1 | ||
756 | both P(z) and Q(z) are expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval, | ||
757 | and presented as matrices of Chebyshev coefficients, denoted by pp and qq, respectively | ||
758 | |||
759 | input: | ||
760 | intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1)) | ||
761 | w is a vector of Chebyshev weights; the weight of j-th interval is w(j) | ||
762 | the interval weights define the inner product of two continuous functions and then | ||
763 | the derived w-norm ||P(z)-Q(z)||_w | ||
764 | pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z) | ||
765 | to be specific, for z in [intv(j), intv(j+1)), P(z) equals | ||
766 | P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z), | ||
767 | where S_i(z) is the `translated' Chebyshev polynomial in that interval, | ||
768 | S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2, | ||
769 | with T_i(z) the Chebyshev polynomial of the first kind, | ||
770 | T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2 | ||
771 | |||
772 | output: | ||
773 | the return matrix, denoted by qq, represents a polynomial Q(z) with degree up to 1+niter | ||
774 | and satisfying Q(0)==1, such that ||P(z))-Q(z)||_w is minimized | ||
775 | this polynomial Q(z) is expanded in the `translated' Chebyshev basis for each interval | ||
776 | to be precise, considering z in [intv(j), intv(j+1)), Q(z) equals | ||
777 | Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(niter+2,j)*S_{niter+1}(z) | ||
778 | |||
779 | note: | ||
780 | 1. since Q(0)==1, P(0)==1 is expected; if P(0)!=1, one can translate P(z) | ||
781 | for example, if P(0)==0, one can use 1-P(z) as input instead of P(z) | ||
782 | 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation | ||
783 | in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals | ||
784 | */ | ||
785 | 9670 | static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal *cpol,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt m,PetscReal *intervalWeights,PetscInt niter) | |
786 | { | ||
787 | 9670 | PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld,nintv; | |
788 | 9670 | PetscReal rho,rho0,rho1,den,bet,alp,alp0,*ppol,*rpol,*appol,*arpol; | |
789 | |||
790 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
9670 | PetscFunctionBegin; |
791 | 9670 | nintv = m-1; | |
792 | 9670 | ld = niter+2; /* leading dimension */ | |
793 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
9670 | PetscCall(PetscCalloc4(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&appol,ld*nintv,&arpol)); |
794 |
4/6✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
9670 | PetscCall(PetscArrayzero(cpol,ld*nintv)); |
795 | /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */ | ||
796 | 58020 | sppol = 2; | |
797 | 58020 | srpol = 2; | |
798 | 58020 | scpol = 2; | |
799 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
58020 | for (j=0;j<nintv;j++) { |
800 | 48350 | ppol[j*ld] = 1.0; | |
801 | 48350 | rpol[j*ld] = 1.0; | |
802 | 48350 | cpol[j*ld] = 1.0; | |
803 | } | ||
804 | /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG) | ||
805 | rpol is the r-polynomial (corresponding to the residual vector r in CG) | ||
806 | cpol is the "corrected" residual polynomial (result of this function) */ | ||
807 | 9670 | sappol = 3; | |
808 | 9670 | sarpol = 3; | |
809 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
9670 | PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld)); |
810 |
4/4✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
183730 | for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld]; |
811 | 9670 | rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights); | |
812 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
1339258 | for (i=0;i<niter;i++) { |
813 | 1337500 | den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights); | |
814 | 1337500 | alp0 = rho/den; | |
815 | 1337500 | rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights); | |
816 | 1337500 | alp = (rho-rho1)/den; | |
817 | 1337500 | srpol++; | |
818 | 1337500 | scpol++; | |
819 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1337500 | PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld)); |
820 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1337500 | PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld)); |
821 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1337500 | if (i+1 == niter) break; |
822 | 1327830 | sarpol++; | |
823 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1327830 | PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld)); |
824 | 1327830 | rho0 = rho; | |
825 | 1327830 | rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights); | |
826 | 1327830 | bet = rho / rho0; | |
827 | 1327830 | sppol++; | |
828 | 1327830 | sappol++; | |
829 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1327830 | PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld)); |
830 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1327830 | PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld)); |
831 | } | ||
832 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
9670 | PetscCall(PetscFree4(ppol,rpol,appol,arpol)); |
833 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
1758 | PetscFunctionReturn(PETSC_SUCCESS); |
834 | } | ||
835 | |||
836 | /* | ||
837 | FILTLAN function FilteredConjugateResidualMatrixPolynomialVectorProduct | ||
838 | |||
839 | this routine employs a conjugate-residual-type algorithm in polynomial space to compute | ||
840 | x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized, where | ||
841 | P(z) is a given piecewise polynomial, called the base filter, | ||
842 | s(z) is a polynomial of degree up to niter, the number of conjugate-residual iterations, | ||
843 | and b and x0 are given vectors | ||
844 | |||
845 | note that P(z) is expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval, | ||
846 | and presented as a matrix of Chebyshev coefficients pp | ||
847 | |||
848 | input: | ||
849 | A is a sparse matrix | ||
850 | x0, b are vectors | ||
851 | niter is the number of conjugate-residual iterations | ||
852 | intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1)) | ||
853 | w is a vector of Chebyshev weights; the weight of j-th interval is w(j) | ||
854 | the interval weights define the inner product of two continuous functions and then | ||
855 | the derived w-norm ||P(z)-Q(z)||_w | ||
856 | pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z) | ||
857 | to be specific, for z in [intv(j), intv(j+1)), P(z) equals | ||
858 | P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z), | ||
859 | where S_i(z) is the `translated' Chebyshev polynomial in that interval, | ||
860 | S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2, | ||
861 | with T_i(z) the Chebyshev polynomial of the first kind, | ||
862 | T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2 | ||
863 | tol is the tolerance; if the residual polynomial in z-norm is dropped by a factor lower | ||
864 | than tol, then stop the conjugate-residual iteration | ||
865 | |||
866 | output: | ||
867 | the return vector is x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized, | ||
868 | subject to that s(z) is a polynomial of degree up to niter, where P(z) is the base filter | ||
869 | in short, z*s(z) approximates 1-P(z) | ||
870 | |||
871 | note: | ||
872 | 1. since z*s(z) approximates 1-P(z), P(0)==1 is expected; if P(0)!=1, one can translate P(z) | ||
873 | for example, if P(0)==0, one can use 1-P(z) as input instead of P(z) | ||
874 | 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation | ||
875 | in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals | ||
876 | 3. a common application is to compute R(A)*b, where R(z) approximates 1-P(z) | ||
877 | in this case, one can set x0 = 0 and then the return vector is x = s(A)*b, where | ||
878 | z*s(z) approximates 1-P(z); therefore, A*x is the wanted R(A)*b | ||
879 | */ | ||
880 | 5748 | static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(Mat A,Vec b,Vec x,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work) | |
881 | { | ||
882 | 5748 | PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld; | |
883 | 5748 | PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0; | |
884 | 5748 | Vec r=work[0],p=work[1],ap=work[2],w=work[3]; | |
885 | 5748 | PetscScalar alpha; | |
886 | |||
887 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
5748 | PetscFunctionBegin; |
888 | 5748 | ld = niter+3; /* leading dimension */ | |
889 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol)); |
890 | /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */ | ||
891 | 34488 | sppol = 2; | |
892 | 34488 | srpol = 2; | |
893 | 34488 | scpol = 2; | |
894 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
34488 | for (j=0;j<nintv;j++) { |
895 | 28740 | ppol[j*ld] = 1.0; | |
896 | 28740 | rpol[j*ld] = 1.0; | |
897 | 28740 | cpol[j*ld] = 1.0; | |
898 | } | ||
899 | /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG) | ||
900 | rpol is the r-polynomial (corresponding to the residual vector r in CG) | ||
901 | cpol is the "corrected" residual polynomial */ | ||
902 | 5748 | sappol = 3; | |
903 | 5748 | sarpol = 3; | |
904 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld)); |
905 |
4/4✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
109212 | for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld]; |
906 | 5748 | rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights); | |
907 | 5748 | rho = rho00; | |
908 | |||
909 | /* corrected CR in vector space */ | ||
910 | /* we assume x0 is always 0 */ | ||
911 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(VecSet(x,0.0)); |
912 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(VecCopy(b,r)); /* initial residual r = b-A*x0 */ |
913 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(VecCopy(r,p)); /* p = r */ |
914 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(MatMult(A,p,ap)); /* ap = A*p */ |
915 | |||
916 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
725348 | for (i=0;i<niter;i++) { |
917 | /* iteration in the polynomial space */ | ||
918 | 719600 | den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights); | |
919 | 719600 | alp0 = rho/den; | |
920 | 719600 | rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights); | |
921 | 719600 | alp = (rho-rho1)/den; | |
922 | 719600 | srpol++; | |
923 | 719600 | scpol++; | |
924 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld)); |
925 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld)); |
926 | 719600 | sarpol++; | |
927 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld)); |
928 | 719600 | rho0 = rho; | |
929 | 719600 | rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights); | |
930 | |||
931 | /* update x in the vector space */ | ||
932 | 719600 | alpha = alp; | |
933 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(VecAXPY(x,alpha,p)); /* x += alp*p */ |
934 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
719600 | if (rho < tol*rho00) break; |
935 | |||
936 | /* finish the iteration in the polynomial space */ | ||
937 | 719600 | bet = rho / rho0; | |
938 | 719600 | sppol++; | |
939 | 719600 | sappol++; | |
940 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld)); |
941 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld)); |
942 | |||
943 | /* finish the iteration in the vector space */ | ||
944 | 719600 | alpha = -alp0; | |
945 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(VecAXPY(r,alpha,ap)); /* r -= alp0*ap */ |
946 | 719600 | alpha = bet; | |
947 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(VecAYPX(p,alpha,r)); /* p = r + bet*p */ |
948 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(MatMult(A,r,w)); /* ap = A*r + bet*ap */ |
949 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(VecAYPX(ap,alpha,w)); |
950 | } | ||
951 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol)); |
952 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
1084 | PetscFunctionReturn(PETSC_SUCCESS); |
953 | } | ||
954 | |||
955 | /* | ||
956 | Gateway to FILTLAN for evaluating y=p(A)*x | ||
957 | */ | ||
958 | 5748 | static PetscErrorCode MatMult_FILTLAN(Mat A,Vec x,Vec y) | |
959 | { | ||
960 | 5748 | ST st; | |
961 | 5748 | ST_FILTER *ctx; | |
962 | 5748 | FILTLAN_CTX filtlan; | |
963 | 5748 | PetscInt npoints; | |
964 | |||
965 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
5748 | PetscFunctionBegin; |
966 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(MatShellGetContext(A,&st)); |
967 | 5748 | ctx = (ST_FILTER*)st->data; | |
968 | 5748 | filtlan = (FILTLAN_CTX)ctx->data; | |
969 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
5748 | npoints = (filtlan->filterInfo->filterType == 2)? 6: 4; |
970 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(ctx->T,x,y,filtlan->baseFilter,2*filtlan->baseDegree+2,filtlan->intervals,npoints-1,filtlan->opts->intervalWeights,ctx->polyDegree,st->work)); |
971 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(VecCopy(y,st->work[0])); |
972 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(MatMult(ctx->T,st->work[0],y)); |
973 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
1084 | PetscFunctionReturn(PETSC_SUCCESS); |
974 | } | ||
975 | |||
976 | /* Block version of FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct */ | ||
977 | 357 | static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(Mat A,Mat B,Mat C,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work,Mat R,Mat P,Mat AP,Mat W) | |
978 | { | ||
979 | 357 | PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld; | |
980 | 357 | PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0; | |
981 | 357 | PetscScalar alpha; | |
982 | |||
983 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
357 | PetscFunctionBegin; |
984 | 357 | ld = niter+3; /* leading dimension */ | |
985 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol)); |
986 | /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */ | ||
987 | 2142 | sppol = 2; | |
988 | 2142 | srpol = 2; | |
989 | 2142 | scpol = 2; | |
990 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
2142 | for (j=0;j<nintv;j++) { |
991 | 1785 | ppol[j*ld] = 1.0; | |
992 | 1785 | rpol[j*ld] = 1.0; | |
993 | 1785 | cpol[j*ld] = 1.0; | |
994 | } | ||
995 | /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG) | ||
996 | rpol is the r-polynomial (corresponding to the residual vector r in CG) | ||
997 | cpol is the "corrected" residual polynomial */ | ||
998 | 357 | sappol = 3; | |
999 | 357 | sarpol = 3; | |
1000 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld)); |
1001 |
4/4✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
6783 | for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld]; |
1002 | 357 | rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights); | |
1003 | 357 | rho = rho00; | |
1004 | |||
1005 | /* corrected CR in vector space */ | ||
1006 | /* we assume x0 is always 0 */ | ||
1007 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(MatZeroEntries(C)); |
1008 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(MatCopy(B,R,SAME_NONZERO_PATTERN)); /* initial residual r = b-A*x0 */ |
1009 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(MatCopy(R,P,SAME_NONZERO_PATTERN)); /* p = r */ |
1010 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(MatMatMult(A,P,MAT_REUSE_MATRIX,PETSC_DEFAULT,&AP)); /* ap = A*p */ |
1011 | |||
1012 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
71757 | for (i=0;i<niter;i++) { |
1013 | /* iteration in the polynomial space */ | ||
1014 | 71400 | den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights); | |
1015 | 71400 | alp0 = rho/den; | |
1016 | 71400 | rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights); | |
1017 | 71400 | alp = (rho-rho1)/den; | |
1018 | 71400 | srpol++; | |
1019 | 71400 | scpol++; | |
1020 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld)); |
1021 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld)); |
1022 | 71400 | sarpol++; | |
1023 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld)); |
1024 | 71400 | rho0 = rho; | |
1025 | 71400 | rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights); | |
1026 | |||
1027 | /* update x in the vector space */ | ||
1028 | 71400 | alpha = alp; | |
1029 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(MatAXPY(C,alpha,P,SAME_NONZERO_PATTERN)); /* x += alp*p */ |
1030 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
71400 | if (rho < tol*rho00) break; |
1031 | |||
1032 | /* finish the iteration in the polynomial space */ | ||
1033 | 71400 | bet = rho / rho0; | |
1034 | 71400 | sppol++; | |
1035 | 71400 | sappol++; | |
1036 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld)); |
1037 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld)); |
1038 | |||
1039 | /* finish the iteration in the vector space */ | ||
1040 | 71400 | alpha = -alp0; | |
1041 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(MatAXPY(R,alpha,AP,SAME_NONZERO_PATTERN)); /* r -= alp0*ap */ |
1042 | 71400 | alpha = bet; | |
1043 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(MatAYPX(P,alpha,R,SAME_NONZERO_PATTERN)); /* p = r + bet*p */ |
1044 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(MatMatMult(A,R,MAT_REUSE_MATRIX,PETSC_DEFAULT,&W)); /* ap = A*r + bet*ap */ |
1045 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(MatAYPX(AP,alpha,W,SAME_NONZERO_PATTERN)); |
1046 | } | ||
1047 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol)); |
1048 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
51 | PetscFunctionReturn(PETSC_SUCCESS); |
1049 | } | ||
1050 | |||
1051 | /* | ||
1052 | Gateway to FILTLAN for evaluating C=p(A)*B | ||
1053 | */ | ||
1054 | 357 | static PetscErrorCode MatMatMult_FILTLAN(Mat A,Mat B,Mat C,void *pctx) | |
1055 | { | ||
1056 | 357 | ST st; | |
1057 | 357 | ST_FILTER *ctx; | |
1058 | 357 | FILTLAN_CTX filtlan; | |
1059 | 357 | PetscInt i,m1,m2,npoints; | |
1060 | |||
1061 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
357 | PetscFunctionBegin; |
1062 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(MatShellGetContext(A,&st)); |
1063 | 357 | ctx = (ST_FILTER*)st->data; | |
1064 | 357 | filtlan = (FILTLAN_CTX)ctx->data; | |
1065 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
357 | npoints = (filtlan->filterInfo->filterType == 2)? 6: 4; |
1066 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
357 | if (ctx->nW) { /* check if work matrices must be resized */ |
1067 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
343 | PetscCall(MatGetSize(B,NULL,&m1)); |
1068 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
343 | PetscCall(MatGetSize(ctx->W[0],NULL,&m2)); |
1069 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
343 | if (m1!=m2) { |
1070 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
42 | PetscCall(MatDestroyMatrices(ctx->nW,&ctx->W)); |
1071 | 42 | ctx->nW = 0; | |
1072 | } | ||
1073 | } | ||
1074 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
357 | if (!ctx->nW) { /* allocate work matrices */ |
1075 | 56 | ctx->nW = 4; | |
1076 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
56 | PetscCall(PetscMalloc1(ctx->nW,&ctx->W)); |
1077 |
7/8✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 8 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 2 times.
|
280 | for (i=0;i<ctx->nW;i++) PetscCall(MatDuplicate(B,MAT_DO_NOT_COPY_VALUES,&ctx->W[i])); |
1078 | } | ||
1079 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(ctx->T,B,ctx->W[0],filtlan->baseFilter,2*filtlan->baseDegree+2,filtlan->intervals,npoints-1,filtlan->opts->intervalWeights,ctx->polyDegree,st->work,C,ctx->W[1],ctx->W[2],ctx->W[3])); |
1080 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(MatMatMult(ctx->T,ctx->W[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&C)); |
1081 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
51 | PetscFunctionReturn(PETSC_SUCCESS); |
1082 | } | ||
1083 | |||
1084 | /* | ||
1085 | FILTLAN function PolynomialFilterInterface::setFilter | ||
1086 | |||
1087 | Creates the shifted (and scaled) matrix and the base filter P(z). | ||
1088 | M is a shell matrix whose MatMult() applies the filter. | ||
1089 | */ | ||
1090 | 88 | static PetscErrorCode STComputeOperator_Filter_FILTLAN(ST st,Mat *G) | |
1091 | { | ||
1092 | 88 | ST_FILTER *ctx = (ST_FILTER*)st->data; | |
1093 | 88 | FILTLAN_CTX filtlan = (FILTLAN_CTX)ctx->data; | |
1094 | 88 | PetscInt i,npoints,n,m,N,M; | |
1095 | 88 | PetscReal frame2[4]; | |
1096 | 88 | PetscScalar alpha; | |
1097 | 88 | const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 }; | |
1098 | |||
1099 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
88 | PetscFunctionBegin; |
1100 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
88 | PetscCall(STSetWorkVecs(st,4)); |
1101 | 88 | filtlan->frame[0] = ctx->left; | |
1102 | 88 | filtlan->frame[1] = ctx->inta; | |
1103 | 88 | filtlan->frame[2] = ctx->intb; | |
1104 | 88 | filtlan->frame[3] = ctx->right; | |
1105 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
88 | if (filtlan->frame[0] == filtlan->frame[1]) { /* low pass filter, convert it to high pass filter */ |
1106 | /* T = frame[3]*eye(n) - A */ | ||
1107 | ✗ | PetscCall(MatDestroy(&ctx->T)); | |
1108 | ✗ | PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T)); | |
1109 | ✗ | PetscCall(MatScale(ctx->T,-1.0)); | |
1110 | ✗ | alpha = filtlan->frame[3]; | |
1111 | ✗ | PetscCall(MatShift(ctx->T,alpha)); | |
1112 | ✗ | for (i=0;i<4;i++) frame2[i] = filtlan->frame[3] - filtlan->frame[3-i]; | |
1113 | } else { /* it can be a mid-pass filter or a high-pass filter */ | ||
1114 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
88 | if (filtlan->frame[0] == 0.0) { |
1115 | ✗ | PetscCall(PetscObjectReference((PetscObject)st->A[0])); | |
1116 | ✗ | PetscCall(MatDestroy(&ctx->T)); | |
1117 | ✗ | ctx->T = st->A[0]; | |
1118 | ✗ | for (i=0;i<4;i++) frame2[i] = filtlan->frame[i]; | |
1119 | } else { | ||
1120 | /* T = A - frame[0]*eye(n) */ | ||
1121 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
88 | PetscCall(MatDestroy(&ctx->T)); |
1122 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
88 | PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T)); |
1123 | 88 | alpha = -filtlan->frame[0]; | |
1124 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
88 | PetscCall(MatShift(ctx->T,alpha)); |
1125 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
440 | for (i=0;i<4;i++) frame2[i] = filtlan->frame[i] - filtlan->frame[0]; |
1126 | } | ||
1127 | } | ||
1128 | |||
1129 | /* no need to recompute filter if the parameters did not change */ | ||
1130 |
3/4✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 10 times.
|
88 | if (st->state==ST_STATE_INITIAL || ctx->filtch) { |
1131 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(FILTLAN_GetIntervals(filtlan->intervals,frame2,ctx->polyDegree,filtlan->baseDegree,filtlan->opts,filtlan->filterInfo)); |
1132 | /* translate the intervals back */ | ||
1133 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
78 | if (filtlan->frame[0] == filtlan->frame[1]) { /* low pass filter, convert it to high pass filter */ |
1134 | ✗ | for (i=0;i<4;i++) filtlan->intervals2[i] = filtlan->frame[3] - filtlan->intervals[3-i]; | |
1135 | } else { /* it can be a mid-pass filter or a high-pass filter */ | ||
1136 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
78 | if (filtlan->frame[0] == 0.0) { |
1137 | ✗ | for (i=0;i<6;i++) filtlan->intervals2[i] = filtlan->intervals[i]; | |
1138 | } else { | ||
1139 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
546 | for (i=0;i<6;i++) filtlan->intervals2[i] = filtlan->intervals[i] + filtlan->frame[0]; |
1140 | } | ||
1141 | } | ||
1142 | |||
1143 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
78 | npoints = (filtlan->filterInfo->filterType == 2)? 6: 4; |
1144 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
78 | PetscCall(PetscFree(filtlan->baseFilter)); |
1145 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(PetscMalloc1((2*filtlan->baseDegree+2)*(npoints-1),&filtlan->baseFilter)); |
1146 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(filtlan->baseFilter,filtlan->intervals,npoints,HighLowFlags,filtlan->baseDegree)); |
1147 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(PetscInfo(st,"Computed value of yLimit = %g\n",(double)filtlan->filterInfo->yLimit)); |
1148 | } | ||
1149 | 88 | ctx->filtch = PETSC_FALSE; | |
1150 | |||
1151 | /* create shell matrix*/ | ||
1152 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
88 | if (!*G) { |
1153 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(MatGetSize(ctx->T,&N,&M)); |
1154 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(MatGetLocalSize(ctx->T,&n,&m)); |
1155 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(MatCreateShell(PetscObjectComm((PetscObject)st),n,m,N,M,st,G)); |
1156 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(MatShellSetOperation(*G,MATOP_MULT,(PetscErrorCodeFn*)MatMult_FILTLAN)); |
1157 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSE,MATDENSE)); |
1158 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSECUDA,MATDENSECUDA)); |
1159 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSEHIP,MATDENSEHIP)); |
1160 | } | ||
1161 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
16 | PetscFunctionReturn(PETSC_SUCCESS); |
1162 | } | ||
1163 | |||
1164 | 287 | static PetscErrorCode STFilterGetThreshold_Filter_FILTLAN(ST st,PetscReal *gamma) | |
1165 | { | ||
1166 | 287 | ST_FILTER *ctx = (ST_FILTER*)st->data; | |
1167 | 287 | FILTLAN_CTX filtlan = (FILTLAN_CTX)ctx->data; | |
1168 | |||
1169 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
287 | PetscFunctionBegin; |
1170 | 287 | *gamma = filtlan->filterInfo->yLimit; | |
1171 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
287 | PetscFunctionReturn(PETSC_SUCCESS); |
1172 | } | ||
1173 | |||
1174 | 68 | static PetscErrorCode STDestroy_Filter_FILTLAN(ST st) | |
1175 | { | ||
1176 | 68 | ST_FILTER *ctx = (ST_FILTER*)st->data; | |
1177 | 68 | FILTLAN_CTX filtlan = (FILTLAN_CTX)ctx->data; | |
1178 | |||
1179 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
68 | PetscFunctionBegin; |
1180 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
68 | PetscCall(PetscFree(filtlan->opts)); |
1181 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
68 | PetscCall(PetscFree(filtlan->filterInfo)); |
1182 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
68 | PetscCall(PetscFree(filtlan->baseFilter)); |
1183 |
6/8✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
68 | PetscCall(PetscFree(filtlan)); |
1184 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
12 | PetscFunctionReturn(PETSC_SUCCESS); |
1185 | } | ||
1186 | |||
1187 | 68 | PetscErrorCode STCreate_Filter_FILTLAN(ST st) | |
1188 | { | ||
1189 | 68 | ST_FILTER *ctx = (ST_FILTER*)st->data; | |
1190 | 68 | FILTLAN_CTX filtlan; | |
1191 | 68 | FILTLAN_IOP iop; | |
1192 | 68 | FILTLAN_PFI pfi; | |
1193 | |||
1194 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
68 | PetscFunctionBegin; |
1195 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
68 | PetscCall(PetscNew(&filtlan)); |
1196 | 68 | ctx->data = (void*)filtlan; | |
1197 | 68 | filtlan->baseDegree = 10; | |
1198 | |||
1199 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
68 | PetscCall(PetscNew(&iop)); |
1200 | 68 | filtlan->opts = iop; | |
1201 | 68 | iop->intervalWeights[0] = 100.0; | |
1202 | 68 | iop->intervalWeights[1] = 1.0; | |
1203 | 68 | iop->intervalWeights[2] = 1.0; | |
1204 | 68 | iop->intervalWeights[3] = 1.0; | |
1205 | 68 | iop->intervalWeights[4] = 100.0; | |
1206 | 68 | iop->transIntervalRatio = 0.6; | |
1207 | 68 | iop->reverseInterval = PETSC_FALSE; | |
1208 | 68 | iop->initialPlateau = 0.1; | |
1209 | 68 | iop->plateauShrinkRate = 1.5; | |
1210 | 68 | iop->initialShiftStep = 0.01; | |
1211 | 68 | iop->shiftStepExpanRate = 1.5; | |
1212 | 68 | iop->maxInnerIter = 30; | |
1213 | 68 | iop->yLimitTol = 0.0001; | |
1214 | 68 | iop->maxOuterIter = 50; | |
1215 | 68 | iop->numGridPoints = 200; | |
1216 | 68 | iop->yBottomLine = 0.001; | |
1217 | 68 | iop->yRippleLimit = 0.8; | |
1218 | |||
1219 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
68 | PetscCall(PetscNew(&pfi)); |
1220 | 68 | filtlan->filterInfo = pfi; | |
1221 | |||
1222 | 68 | ctx->computeoperator = STComputeOperator_Filter_FILTLAN; | |
1223 | 68 | ctx->getthreshold = STFilterGetThreshold_Filter_FILTLAN; | |
1224 | 68 | ctx->destroy = STDestroy_Filter_FILTLAN; | |
1225 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
68 | PetscFunctionReturn(PETSC_SUCCESS); |
1226 | } | ||
1227 |