| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
| 3 | SLEPc - Scalable Library for Eigenvalue Problem Computations | ||
| 4 | Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain | ||
| 5 | |||
| 6 | This file is part of SLEPc. | ||
| 7 | SLEPc is distributed under a 2-clause BSD license (see LICENSE). | ||
| 8 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
| 9 | */ | ||
| 10 | /* | ||
| 11 | This file is an adaptation of several subroutines from FILTLAN, the | ||
| 12 | Filtered Lanczos Package, authored by Haw-ren Fang and Yousef Saad. | ||
| 13 | |||
| 14 | More information at: | ||
| 15 | https://www-users.cs.umn.edu/~saad/software/filtlan | ||
| 16 | |||
| 17 | References: | ||
| 18 | |||
| 19 | [1] H. Fang and Y. Saad, "A filtered Lanczos procedure for extreme and interior | ||
| 20 | eigenvalue problems", SIAM J. Sci. Comput. 34(4):A2220-A2246, 2012. | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include <slepc/private/stimpl.h> | ||
| 24 | #include "filter.h" | ||
| 25 | |||
| 26 | static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal*,PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal*,PetscInt); | ||
| 27 | static PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal); | ||
| 28 | static PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt,PetscReal,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*); | ||
| 29 | |||
| 30 | /* //////////////////////////////////////////////////////////////////////////// | ||
| 31 | // Newton - Hermite Polynomial Interpolation | ||
| 32 | //////////////////////////////////////////////////////////////////////////// */ | ||
| 33 | |||
| 34 | /* | ||
| 35 | FILTLAN function NewtonPolynomial | ||
| 36 | |||
| 37 | build P(z) by Newton's divided differences in the form | ||
| 38 | P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1)), | ||
| 39 | such that P(x(i)) = y(i) for i=1,...,n, where | ||
| 40 | x,y are input vectors of length n, and a is the output vector of length n | ||
| 41 | if x(i)==x(j) for some i!=j, then it is assumed that the derivative of P(z) is to be zero at x(i), | ||
| 42 | and the Hermite polynomial interpolation is applied | ||
| 43 | in general, if there are k x(i)'s with the same value x0, then | ||
| 44 | the j-th order derivative of P(z) is zero at z=x0 for j=1,...,k-1 | ||
| 45 | */ | ||
| 46 | 19496 | static inline PetscErrorCode FILTLAN_NewtonPolynomial(PetscInt n,PetscReal *x,PetscReal *y,PetscReal *sa,PetscReal *sf) | |
| 47 | { | ||
| 48 | 19496 | PetscReal d,*sx=x,*sy=y; | |
| 49 | 19496 | PetscInt j,k; | |
| 50 | |||
| 51 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
19496 | PetscFunctionBegin; |
| 52 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
19496 | PetscCall(PetscArraycpy(sf,sy,n)); |
| 53 | |||
| 54 | /* apply Newton's finite difference method */ | ||
| 55 | 19496 | sa[0] = sf[0]; | |
| 56 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
428912 | for (j=1;j<n;j++) { |
| 57 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
4912992 | for (k=n-1;k>=j;k--) { |
| 58 | 4503576 | d = sx[k]-sx[k-j]; | |
| 59 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
4503576 | if (PetscUnlikely(d == 0.0)) sf[k] = 0.0; /* assume that the derivative is 0.0 and apply the Hermite interpolation */ |
| 60 | 2167836 | else sf[k] = (sf[k]-sf[k-1]) / d; | |
| 61 | } | ||
| 62 | 409416 | sa[j] = sf[j]; | |
| 63 | } | ||
| 64 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
3544 | PetscFunctionReturn(PETSC_SUCCESS); |
| 65 | } | ||
| 66 | |||
| 67 | /* | ||
| 68 | FILTLAN function HermiteBaseFilterInChebyshevBasis | ||
| 69 | |||
| 70 | compute a base filter P(z) which is a continuous, piecewise polynomial P(z) expanded | ||
| 71 | in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials in each interval | ||
| 72 | |||
| 73 | The base filter P(z) equals P_j(z) for z in the j-th interval [intv(j), intv(j+1)), where | ||
| 74 | P_j(z) a Hermite interpolating polynomial | ||
| 75 | |||
| 76 | input: | ||
| 77 | intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1)) | ||
| 78 | HiLowFlags determines the shape of the base filter P(z) | ||
| 79 | Consider the j-th interval [intv(j), intv(j+1)] | ||
| 80 | HighLowFlag[j-1]==1, P(z)==1 for z in [intv(j), intv(j+1)] | ||
| 81 | ==0, P(z)==0 for z in [intv(j), intv(j+1)] | ||
| 82 | ==-1, [intv(j), intv(j+1)] is a transition interval; | ||
| 83 | P(intv(j)) and P(intv(j+1)) are defined such that P(z) is continuous | ||
| 84 | baseDeg is the degree of smoothness of the Hermite (piecewise polynomial) interpolation | ||
| 85 | to be precise, the i-th derivative of P(z) is zero, i.e. d^{i}P(z)/dz^i==0, at all interval | ||
| 86 | end points z=intv(j) for i=1,...,baseDeg | ||
| 87 | |||
| 88 | output: | ||
| 89 | P(z) expanded in a basis of `translated' (scale-and-shift) Chebyshev polynomials | ||
| 90 | to be precise, for z in the j-th interval [intv(j),intv(j+1)), P(z) equals | ||
| 91 | P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z), | ||
| 92 | where S_i(z) is the `translated' Chebyshev polynomial in that interval, | ||
| 93 | S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2, | ||
| 94 | with T_i(z) the Chebyshev polynomial of the first kind, | ||
| 95 | T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2 | ||
| 96 | the return matrix is the matrix of Chebyshev coefficients pp just described | ||
| 97 | |||
| 98 | note that the degree of P(z) in each interval is (at most) 2*baseDeg+1, with 2*baseDeg+2 coefficients | ||
| 99 | let n be the length of intv; then there are n-1 intervals | ||
| 100 | therefore the return matrix pp is of size (2*baseDeg+2)-by-(n-1) | ||
| 101 | */ | ||
| 102 | 9748 | static PetscErrorCode FILTLAN_HermiteBaseFilterInChebyshevBasis(PetscReal *baseFilter,PetscReal *intv,PetscInt npoints,const PetscInt *HighLowFlags,PetscInt baseDeg) | |
| 103 | { | ||
| 104 | 9748 | PetscInt m,ii,jj; | |
| 105 | 9748 | PetscReal flag,flag0,flag2,aa,bb,*px,*py,*sx,*sy,*pp,*qq,*sq,*sbf,*work,*currentPoint = intv; | |
| 106 | 9748 | const PetscInt *hilo = HighLowFlags; | |
| 107 | |||
| 108 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
9748 | PetscFunctionBegin; |
| 109 | 9748 | m = 2*baseDeg+2; | |
| 110 | 9748 | jj = npoints-1; /* jj is initialized as the number of intervals */ | |
| 111 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
9748 | PetscCall(PetscMalloc5(m,&px,m,&py,m,&pp,m,&qq,m,&work)); |
| 112 | sbf = baseFilter; | ||
| 113 | |||
| 114 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
58488 | while (jj--) { /* the main loop to compute the Chebyshev coefficients */ |
| 115 | |||
| 116 | 48740 | flag = (PetscReal)(*hilo++); /* get the flag of the current interval */ | |
| 117 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
48740 | if (flag == -1.0) { /* flag == -1 means that the current interval is a transition polynomial */ |
| 118 | |||
| 119 | 19496 | flag2 = (PetscReal)(*hilo); /* get flag2, the flag of the next interval */ | |
| 120 | 19496 | flag0 = 1.0-flag2; /* the flag of the previous interval is 1-flag2 */ | |
| 121 | |||
| 122 | /* two pointers for traversing x[] and y[] */ | ||
| 123 | 19496 | sx = px; | |
| 124 | 19496 | sy = py; | |
| 125 | |||
| 126 | /* find the current interval [aa,bb] */ | ||
| 127 | 19496 | aa = *currentPoint++; | |
| 128 | 19496 | bb = *currentPoint; | |
| 129 | |||
| 130 | /* now left-hand side */ | ||
| 131 | 19496 | ii = baseDeg+1; | |
| 132 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
233952 | while (ii--) { |
| 133 | 214456 | *sy++ = flag0; | |
| 134 | 214456 | *sx++ = aa; | |
| 135 | } | ||
| 136 | |||
| 137 | /* now right-hand side */ | ||
| 138 | ii = baseDeg+1; | ||
| 139 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
233952 | while (ii--) { |
| 140 | 214456 | *sy++ = flag2; | |
| 141 | 214456 | *sx++ = bb; | |
| 142 | } | ||
| 143 | |||
| 144 | /* build a Newton polynomial (indeed, the generalized Hermite interpolating polynomial) with x[] and y[] */ | ||
| 145 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
19496 | PetscCall(FILTLAN_NewtonPolynomial(m,px,py,pp,work)); |
| 146 | |||
| 147 | /* pp contains coefficients of the Newton polynomial P(z) in the current interval [aa,bb], where | ||
| 148 | P(z) = pp(1) + pp(2)*(z-px(1)) + pp(3)*(z-px(1))*(z-px(2)) + ... + pp(n)*(z-px(1))*...*(z-px(n-1)) */ | ||
| 149 | |||
| 150 | /* translate the Newton coefficients to the Chebyshev coefficients */ | ||
| 151 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
19496 | PetscCall(FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(m,aa,bb,pp,px,qq,work)); |
| 152 | /* qq contains coefficients of the polynomial in [aa,bb] in the `translated' Chebyshev basis */ | ||
| 153 | |||
| 154 | /* copy the Chebyshev coefficients to baseFilter | ||
| 155 | OCTAVE/MATLAB: B(:,j) = qq, where j = (npoints-1)-jj and B is the return matrix */ | ||
| 156 | 19496 | sq = qq; | |
| 157 | 19496 | ii = 2*baseDeg+2; | |
| 158 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
448408 | while (ii--) *sbf++ = *sq++; |
| 159 | |||
| 160 | } else { | ||
| 161 | |||
| 162 | /* a constant polynomial P(z)=flag, where either flag==0 or flag==1 | ||
| 163 | OCTAVE/MATLAB: B(1,j) = flag, where j = (npoints-1)-jj and B is the return matrix */ | ||
| 164 | 29244 | *sbf++ = flag; | |
| 165 | |||
| 166 | /* the other coefficients are all zero, since P(z) is a constant | ||
| 167 | OCTAVE/MATLAB: B(1,j) = 0, where j = (npoints-1)-jj and B is the return matrix */ | ||
| 168 | 29244 | ii = 2*baseDeg+1; | |
| 169 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
643368 | while (ii--) *sbf++ = 0.0; |
| 170 | |||
| 171 | /* for the next point */ | ||
| 172 | 29244 | currentPoint++; | |
| 173 | } | ||
| 174 | } | ||
| 175 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
9748 | PetscCall(PetscFree5(px,py,pp,qq,work)); |
| 176 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
1772 | PetscFunctionReturn(PETSC_SUCCESS); |
| 177 | } | ||
| 178 | |||
| 179 | /* //////////////////////////////////////////////////////////////////////////// | ||
| 180 | // Base Filter | ||
| 181 | //////////////////////////////////////////////////////////////////////////// */ | ||
| 182 | |||
| 183 | /* | ||
| 184 | FILTLAN function GetIntervals | ||
| 185 | |||
| 186 | this routine determines the intervals (including the transition one(s)) by an iterative process | ||
| 187 | |||
| 188 | frame is a vector consisting of 4 ordered elements: | ||
| 189 | [frame(1),frame(4)] is the interval which (tightly) contains all eigenvalues, and | ||
| 190 | [frame(2),frame(3)] is the interval in which the eigenvalues are sought | ||
| 191 | baseDeg is the left-and-right degree of the base filter for each interval | ||
| 192 | polyDeg is the (maximum possible) degree of s(z), with z*s(z) the polynomial filter | ||
| 193 | intv is the output; the j-th interval is [intv(j),intv(j+1)) | ||
| 194 | opts is a collection of interval options | ||
| 195 | |||
| 196 | the base filter P(z) is a piecewise polynomial from Hermite interpolation with degree baseDeg | ||
| 197 | at each end point of intervals | ||
| 198 | |||
| 199 | the polynomial filter Q(z) is in the form z*s(z), i.e. Q(0)==0, such that ||(1-P(z))-z*s(z)||_w is | ||
| 200 | minimized with s(z) a polynomial of degree up to polyDeg | ||
| 201 | |||
| 202 | the resulting polynomial filter Q(z) satisfies Q(x)>=Q(y) for x in [frame[1],frame[2]], and | ||
| 203 | y in [frame[0],frame[3]] but not in [frame[1],frame[2]] | ||
| 204 | |||
| 205 | the routine fills a PolynomialFilterInfo struct which gives some information of the polynomial filter | ||
| 206 | */ | ||
| 207 | 78 | static PetscErrorCode FILTLAN_GetIntervals(PetscReal *intervals,PetscReal *frame,PetscInt polyDeg,PetscInt baseDeg,FILTLAN_IOP opts,FILTLAN_PFI filterInfo) | |
| 208 | { | ||
| 209 | 78 | PetscReal intv[6],x,y,z1,z2,c,c1,c2,fc,fc2,halfPlateau,leftDelta,rightDelta,gridSize; | |
| 210 | 78 | PetscReal yLimit,ySummit,yLeftLimit,yRightLimit,bottom,qIndex,*baseFilter,*polyFilter; | |
| 211 | 78 | PetscReal yLimitGap=0.0,yLeftSummit=0.0,yLeftBottom=0.0,yRightSummit=0.0,yRightBottom=0.0; | |
| 212 | 78 | PetscInt i,ii,npoints,numIter,numLeftSteps=1,numRightSteps=1,numMoreLooked=0; | |
| 213 | 78 | PetscBool leftBoundaryMet=PETSC_FALSE,rightBoundaryMet=PETSC_FALSE,stepLeft,stepRight; | |
| 214 | 78 | const PetscReal a=frame[0],a1=frame[1],b1=frame[2],b=frame[3]; | |
| 215 | 78 | const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 }; /* if filterType is 1, only first 3 elements will be used */ | |
| 216 | 78 | const PetscInt numLookMore = 2*(PetscInt)(0.5+(PetscLogReal(2.0)/PetscLogReal(opts->shiftStepExpanRate))); | |
| 217 | |||
| 218 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
78 | PetscFunctionBegin; |
| 219 |
3/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 10 times.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
78 | PetscCheck(a<=a1 && a1<=b1 && b1<=b,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Values in the frame vector should be non-decreasing"); |
| 220 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
78 | PetscCheck(a1!=b1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The range of wanted eigenvalues cannot be of size zero"); |
| 221 | 78 | filterInfo->filterType = 2; /* mid-pass filter, for interior eigenvalues */ | |
| 222 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
78 | if (b == b1) { |
| 223 | ✗ | PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"A polynomial filter should not cover all eigenvalues"); | |
| 224 | ✗ | filterInfo->filterType = 1; /* high-pass filter, for largest eigenvalues */ | |
| 225 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
78 | } else PetscCheck(a!=a1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"filterType==3 for smallest eigenvalues should be pre-converted to filterType==1 for largest eigenvalues"); |
| 226 | |||
| 227 | /* the following recipe follows Yousef Saad (2005, 2006) with a few minor adaptations / enhancements */ | ||
| 228 | 78 | halfPlateau = 0.5*(b1-a1)*opts->initialPlateau; /* half length of the "plateau" of the (dual) base filter */ | |
| 229 | 78 | leftDelta = (b1-a1)*opts->initialShiftStep; /* initial left shift */ | |
| 230 | 78 | rightDelta = leftDelta; /* initial right shift */ | |
| 231 | 78 | opts->numGridPoints = PetscMax(opts->numGridPoints,(PetscInt)(2.0*(b-a)/halfPlateau)); | |
| 232 | 78 | gridSize = (b-a) / (PetscReal)opts->numGridPoints; | |
| 233 | |||
| 234 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
546 | for (i=0;i<6;i++) intv[i] = 0.0; |
| 235 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
78 | if (filterInfo->filterType == 2) { /* for interior eigenvalues */ |
| 236 | 78 | npoints = 6; | |
| 237 | 78 | intv[0] = a; | |
| 238 | 78 | intv[5] = b; | |
| 239 | /* intv[1], intv[2], intv[3], and intv[4] to be determined */ | ||
| 240 | } else { /* filterType == 1 (or 3 with conversion), for extreme eigenvalues */ | ||
| 241 | ✗ | npoints = 4; | |
| 242 | ✗ | intv[0] = a; | |
| 243 | ✗ | intv[3] = b; | |
| 244 | /* intv[1], and intv[2] to be determined */ | ||
| 245 | } | ||
| 246 | 78 | z1 = a1 - leftDelta; | |
| 247 | 78 | z2 = b1 + rightDelta; | |
| 248 | 78 | filterInfo->filterOK = 0; /* not yet found any OK filter */ | |
| 249 | |||
| 250 | /* allocate matrices */ | ||
| 251 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(PetscMalloc2((polyDeg+2)*(npoints-1),&polyFilter,(2*baseDeg+2)*(npoints-1),&baseFilter)); |
| 252 | |||
| 253 | /* initialize the intervals, mainly for the case opts->maxOuterIter == 0 */ | ||
| 254 | 78 | intervals[0] = intv[0]; | |
| 255 | 78 | intervals[3] = intv[3]; | |
| 256 | 78 | intervals[5] = intv[5]; | |
| 257 | 78 | intervals[1] = z1; | |
| 258 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
78 | if (filterInfo->filterType == 2) { /* a mid-pass filter for interior eigenvalues */ |
| 259 | 78 | intervals[4] = z2; | |
| 260 | 78 | c = (a1+b1) / 2.0; | |
| 261 | 78 | intervals[2] = c - halfPlateau; | |
| 262 | 78 | intervals[3] = c + halfPlateau; | |
| 263 | } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */ | ||
| 264 | ✗ | intervals[2] = z1 + (b1-z1)*opts->transIntervalRatio; | |
| 265 | } | ||
| 266 | |||
| 267 | /* the main loop */ | ||
| 268 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
790 | for (numIter=1;numIter<=opts->maxOuterIter;numIter++) { |
| 269 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
790 | if (z1 <= a) { /* outer loop updates z1 and z2 */ |
| 270 | 180 | z1 = a; | |
| 271 | 180 | leftBoundaryMet = PETSC_TRUE; | |
| 272 | } | ||
| 273 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
790 | if (filterInfo->filterType == 2) { /* a <= z1 < (a1) */ |
| 274 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
790 | if (z2 >= b) { /* a mid-pass filter for interior eigenvalues */ |
| 275 | ✗ | z2 = b; | |
| 276 | ✗ | rightBoundaryMet = PETSC_TRUE; | |
| 277 | } | ||
| 278 | /* a <= z1 < c-h < c+h < z2 <= b, where h is halfPlateau */ | ||
| 279 | /* [z1, c-h] and [c+h, z2] are transition interval */ | ||
| 280 | 790 | intv[1] = z1; | |
| 281 | 790 | intv[4] = z2; | |
| 282 | 790 | c1 = z1 + halfPlateau; | |
| 283 | 790 | intv[2] = z1; /* i.e. c1 - halfPlateau */ | |
| 284 | 790 | intv[3] = c1 + halfPlateau; | |
| 285 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
790 | PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg)); |
| 286 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
790 | PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg)); |
| 287 | /* fc1 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); */ | ||
| 288 | 790 | c2 = z2 - halfPlateau; | |
| 289 | 790 | intv[2] = c2 - halfPlateau; | |
| 290 | 790 | intv[3] = z2; /* i.e. c2 + halfPlateau */ | |
| 291 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
790 | PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg)); |
| 292 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
790 | PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg)); |
| 293 | 790 | fc2 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); | |
| 294 | 790 | yLimitGap = PETSC_MAX_REAL; | |
| 295 | 790 | ii = opts->maxInnerIter; | |
| 296 |
3/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
8880 | while (ii-- && !(yLimitGap <= opts->yLimitTol)) { |
| 297 | /* recursive bisection to get c such that p(a1) are p(b1) approximately the same */ | ||
| 298 | 8090 | c = (c1+c2) / 2.0; | |
| 299 | 8090 | intv[2] = c - halfPlateau; | |
| 300 | 8090 | intv[3] = c + halfPlateau; | |
| 301 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
8090 | PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg)); |
| 302 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
8090 | PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg)); |
| 303 | 8090 | fc = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); | |
| 304 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
8090 | if (fc*fc2 < 0.0) { |
| 305 | c1 = c; | ||
| 306 | /* fc1 = fc; */ | ||
| 307 | } else { | ||
| 308 | 4447 | c2 = c; | |
| 309 | 4447 | fc2 = fc; | |
| 310 | } | ||
| 311 | 8090 | yLimitGap = PetscAbsReal(fc); | |
| 312 | } | ||
| 313 | } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */ | ||
| 314 | ✗ | intv[1] = z1; | |
| 315 | ✗ | intv[2] = z1 + (b1-z1)*opts->transIntervalRatio; | |
| 316 | ✗ | PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,4,HighLowFlags,baseDeg)); | |
| 317 | ✗ | PetscCall(FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,4,opts->intervalWeights,polyDeg)); | |
| 318 | } | ||
| 319 | /* polyFilter contains the coefficients of the polynomial filter which approximates phi(x) | ||
| 320 | expanded in the `translated' Chebyshev basis */ | ||
| 321 | /* psi(x) = 1.0 - phi(x) is the dual base filter approximated by a polynomial in the form x*p(x) */ | ||
| 322 | 790 | yLeftLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); | |
| 323 | 790 | yRightLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1); | |
| 324 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
790 | yLimit = (yLeftLimit < yRightLimit) ? yLeftLimit : yRightLimit; |
| 325 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
790 | ySummit = (yLeftLimit > yRightLimit) ? yLeftLimit : yRightLimit; |
| 326 | x = a1; | ||
| 327 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
31600 | while ((x+=gridSize) < b1) { |
| 328 | 30810 | y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x); | |
| 329 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
30810 | if (y < yLimit) yLimit = y; |
| 330 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
30810 | if (y > ySummit) ySummit = y; |
| 331 | } | ||
| 332 | /* now yLimit is the minimum of x*p(x) for x in [a1, b1] */ | ||
| 333 | 790 | stepLeft = PETSC_FALSE; | |
| 334 | 790 | stepRight = PETSC_FALSE; | |
| 335 |
6/6✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
✓ Branch 4 taken 10 times.
✓ Branch 5 taken 10 times.
|
790 | if ((yLimit < yLeftLimit && yLimit < yRightLimit) || yLimit < opts->yBottomLine) { |
| 336 | /* very bad, step to see what will happen */ | ||
| 337 | 250 | stepLeft = PETSC_TRUE; | |
| 338 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
250 | if (filterInfo->filterType == 2) stepRight = PETSC_TRUE; |
| 339 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
540 | } else if (filterInfo->filterType == 2) { |
| 340 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
540 | if (yLeftLimit < yRightLimit) { |
| 341 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
302 | if (yRightLimit-yLeftLimit > opts->yLimitTol) stepLeft = PETSC_TRUE; |
| 342 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
238 | } else if (yLeftLimit-yRightLimit > opts->yLimitTol) stepRight = PETSC_TRUE; |
| 343 | } | ||
| 344 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
790 | if (!stepLeft) { |
| 345 | yLeftBottom = yLeftLimit; | ||
| 346 | x = a1; | ||
| 347 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
28144 | while ((x-=gridSize) >= a) { |
| 348 | 28094 | y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x); | |
| 349 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
28094 | if (y < yLeftBottom) yLeftBottom = y; |
| 350 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
|
490 | else if (y > yLeftBottom) break; |
| 351 | } | ||
| 352 | yLeftSummit = yLeftBottom; | ||
| 353 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1670316 | while ((x-=gridSize) >= a) { |
| 354 | 1669776 | y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x); | |
| 355 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1669776 | if (y > yLeftSummit) { |
| 356 | 23866 | yLeftSummit = y; | |
| 357 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
23866 | if (yLeftSummit > yLimit*opts->yRippleLimit) { |
| 358 | stepLeft = PETSC_TRUE; | ||
| 359 | break; | ||
| 360 | } | ||
| 361 | } | ||
| 362 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1669776 | if (y < yLeftBottom) yLeftBottom = y; |
| 363 | } | ||
| 364 | } | ||
| 365 |
3/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
790 | if (filterInfo->filterType == 2 && !stepRight) { |
| 366 | yRightBottom = yRightLimit; | ||
| 367 | x = b1; | ||
| 368 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
29748 | while ((x+=gridSize) <= b) { |
| 369 | 29748 | y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x); | |
| 370 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
29748 | if (y < yRightBottom) yRightBottom = y; |
| 371 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
|
540 | else if (y > yRightBottom) break; |
| 372 | } | ||
| 373 | yRightSummit = yRightBottom; | ||
| 374 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1328372 | while ((x+=gridSize) <= b) { |
| 375 | 1327832 | y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x); | |
| 376 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1327832 | if (y > yRightSummit) { |
| 377 | 23970 | yRightSummit = y; | |
| 378 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
23970 | if (yRightSummit > yLimit*opts->yRippleLimit) { |
| 379 | stepRight = PETSC_TRUE; | ||
| 380 | break; | ||
| 381 | } | ||
| 382 | } | ||
| 383 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1327832 | if (y < yRightBottom) yRightBottom = y; |
| 384 | } | ||
| 385 | } | ||
| 386 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
790 | if (!stepLeft && !stepRight) { |
| 387 |
3/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
540 | if (filterInfo->filterType == 2) bottom = PetscMin(yLeftBottom,yRightBottom); |
| 388 | else bottom = yLeftBottom; | ||
| 389 | 540 | qIndex = 1.0 - (yLimit-bottom) / (ySummit-bottom); | |
| 390 |
4/4✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
540 | if (filterInfo->filterOK == 0 || filterInfo->filterQualityIndex < qIndex) { |
| 391 | /* found the first OK filter or a better filter */ | ||
| 392 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1596 | for (i=0;i<6;i++) intervals[i] = intv[i]; |
| 393 | 228 | filterInfo->filterOK = 1; | |
| 394 | 228 | filterInfo->filterQualityIndex = qIndex; | |
| 395 | 228 | filterInfo->numIter = numIter; | |
| 396 | 228 | filterInfo->yLimit = yLimit; | |
| 397 | 228 | filterInfo->ySummit = ySummit; | |
| 398 | 228 | filterInfo->numLeftSteps = numLeftSteps; | |
| 399 | 228 | filterInfo->yLeftSummit = yLeftSummit; | |
| 400 | 228 | filterInfo->yLeftBottom = yLeftBottom; | |
| 401 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
228 | if (filterInfo->filterType == 2) { |
| 402 | 228 | filterInfo->yLimitGap = yLimitGap; | |
| 403 | 228 | filterInfo->numRightSteps = numRightSteps; | |
| 404 | 228 | filterInfo->yRightSummit = yRightSummit; | |
| 405 | 228 | filterInfo->yRightBottom = yRightBottom; | |
| 406 | } | ||
| 407 | numMoreLooked = 0; | ||
| 408 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
312 | } else if (++numMoreLooked == numLookMore) { |
| 409 | /* filter has been optimal */ | ||
| 410 | 78 | filterInfo->filterOK = 2; | |
| 411 | 78 | break; | |
| 412 | } | ||
| 413 | /* try stepping further to see whether it can improve */ | ||
| 414 | 462 | stepLeft = PETSC_TRUE; | |
| 415 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
462 | if (filterInfo->filterType == 2) stepRight = PETSC_TRUE; |
| 416 | } | ||
| 417 | /* check whether we can really "step" */ | ||
| 418 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
712 | if (leftBoundaryMet) { |
| 419 |
2/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
|
140 | if (filterInfo->filterType == 1 || rightBoundaryMet) break; /* cannot step further, so break the loop */ |
| 420 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
140 | if (stepLeft) { |
| 421 | /* cannot step left, so try stepping right */ | ||
| 422 | 140 | stepLeft = PETSC_FALSE; | |
| 423 | 140 | stepRight = PETSC_TRUE; | |
| 424 | } | ||
| 425 | } | ||
| 426 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
712 | if (rightBoundaryMet && stepRight) { |
| 427 | /* cannot step right, so try stepping left */ | ||
| 428 | stepRight = PETSC_FALSE; | ||
| 429 | stepLeft = PETSC_TRUE; | ||
| 430 | } | ||
| 431 | /* now "step" */ | ||
| 432 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
712 | if (stepLeft) { |
| 433 | 572 | numLeftSteps++; | |
| 434 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
572 | if (filterInfo->filterType == 2) leftDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */ |
| 435 | 572 | z1 -= leftDelta; | |
| 436 | } | ||
| 437 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
712 | if (stepRight) { |
| 438 | 712 | numRightSteps++; | |
| 439 | 712 | rightDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */ | |
| 440 | 712 | z2 += rightDelta; | |
| 441 | } | ||
| 442 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
712 | if (filterInfo->filterType == 2) { |
| 443 | /* shrink the "plateau" of the (dual) base filter */ | ||
| 444 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
712 | if (stepLeft && stepRight) halfPlateau /= opts->plateauShrinkRate; |
| 445 | 140 | else halfPlateau /= PetscSqrtReal(opts->plateauShrinkRate); | |
| 446 | } | ||
| 447 | } | ||
| 448 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
78 | PetscCheck(filterInfo->filterOK,PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"STFILTER cannot get the filter specified; please adjust your filter parameters (e.g. increasing the polynomial degree)"); |
| 449 | |||
| 450 | 78 | filterInfo->totalNumIter = numIter; | |
| 451 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(PetscFree2(polyFilter,baseFilter)); |
| 452 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
14 | PetscFunctionReturn(PETSC_SUCCESS); |
| 453 | } | ||
| 454 | |||
| 455 | /* //////////////////////////////////////////////////////////////////////////// | ||
| 456 | // Chebyshev Polynomials | ||
| 457 | //////////////////////////////////////////////////////////////////////////// */ | ||
| 458 | |||
| 459 | /* | ||
| 460 | FILTLAN function ExpandNewtonPolynomialInChebyshevBasis | ||
| 461 | |||
| 462 | translate the coefficients of a Newton polynomial to the coefficients | ||
| 463 | in a basis of the `translated' (scale-and-shift) Chebyshev polynomials | ||
| 464 | |||
| 465 | input: | ||
| 466 | a Newton polynomial defined by vectors a and x: | ||
| 467 | P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1)) | ||
| 468 | the interval [aa,bb] defines the `translated' Chebyshev polynomials S_i(z) = T_i((z-c)/h), | ||
| 469 | where c=(aa+bb)/2 and h=(bb-aa)/2, and T_i is the Chebyshev polynomial of the first kind | ||
| 470 | note that T_i is defined by T_0(z)=1, T_1(z)=z, and T_i(z)=2*z*T_{i-1}(z)+T_{i-2}(z) for i>=2 | ||
| 471 | |||
| 472 | output: | ||
| 473 | a vector q containing the Chebyshev coefficients: | ||
| 474 | P(z) = q(1)*S_0(z) + q(2)*S_1(z) + ... + q(n)*S_{n-1}(z) | ||
| 475 | */ | ||
| 476 | 19496 | static inline PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt n,PetscReal aa,PetscReal bb,PetscReal *a,PetscReal *x,PetscReal *q,PetscReal *q2) | |
| 477 | { | ||
| 478 | 19496 | PetscInt m,mm; | |
| 479 | 19496 | PetscReal *sa,*sx,*sq,*sq2,c,c2,h,h2; | |
| 480 | |||
| 481 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
19496 | PetscFunctionBegin; |
| 482 | 19496 | sa = a+n; /* pointers for traversing a and x */ | |
| 483 | 19496 | sx = x+n-1; | |
| 484 | 19496 | *q = *--sa; /* set q[0] = a(n) */ | |
| 485 | |||
| 486 | 19496 | c = (aa+bb)/2.0; | |
| 487 | 19496 | h = (bb-aa)/2.0; | |
| 488 | 19496 | h2 = h/2.0; | |
| 489 | |||
| 490 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
428912 | for (m=1;m<=n-1;m++) { /* the main loop for translation */ |
| 491 | |||
| 492 | /* compute q2[0:m-1] = (c-x[n-m-1])*q[0:m-1] */ | ||
| 493 | 409416 | mm = m; | |
| 494 | 409416 | sq = q; | |
| 495 | 409416 | sq2 = q2; | |
| 496 | 409416 | c2 = c-(*--sx); | |
| 497 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
4912992 | while (mm--) *(sq2++) = c2*(*sq++); |
| 498 | 409416 | *sq2 = 0.0; /* q2[m] = 0.0 */ | |
| 499 | 409416 | *(q2+1) += h*(*q); /* q2[1] = q2[1] + h*q[0] */ | |
| 500 | |||
| 501 | /* compute q2[0:m-2] = q2[0:m-2] + h2*q[1:m-1] */ | ||
| 502 | 409416 | mm = m-1; | |
| 503 | 409416 | sq2 = q2; | |
| 504 | 409416 | sq = q+1; | |
| 505 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
4503576 | while (mm--) *(sq2++) += h2*(*sq++); |
| 506 | |||
| 507 | /* compute q2[2:m] = q2[2:m] + h2*q[1:m-1] */ | ||
| 508 | 409416 | mm = m-1; | |
| 509 | 409416 | sq2 = q2+2; | |
| 510 | 409416 | sq = q+1; | |
| 511 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
4503576 | while (mm--) *(sq2++) += h2*(*sq++); |
| 512 | |||
| 513 | /* compute q[0:m] = q2[0:m] */ | ||
| 514 | 409416 | mm = m+1; | |
| 515 | 409416 | sq2 = q2; | |
| 516 | 409416 | sq = q; | |
| 517 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
5322408 | while (mm--) *sq++ = *sq2++; |
| 518 | 409416 | *q += (*--sa); /* q[0] = q[0] + p[n-m-1] */ | |
| 519 | } | ||
| 520 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
19496 | PetscFunctionReturn(PETSC_SUCCESS); |
| 521 | } | ||
| 522 | |||
| 523 | /* | ||
| 524 | FILTLAN function PolynomialEvaluationInChebyshevBasis | ||
| 525 | |||
| 526 | evaluate P(z) at z=z0, where P(z) is a polynomial expanded in a basis of | ||
| 527 | the `translated' (i.e. scale-and-shift) Chebyshev polynomials | ||
| 528 | |||
| 529 | input: | ||
| 530 | c is a vector of Chebyshev coefficients which defines the polynomial | ||
| 531 | P(z) = c(1)*S_0(z) + c(2)*S_1(z) + ... + c(n)*S_{n-1}(z), | ||
| 532 | where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with | ||
| 533 | c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2 | ||
| 534 | note that T_i(z) is the Chebyshev polynomial of the first kind, | ||
| 535 | T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2 | ||
| 536 | |||
| 537 | output: | ||
| 538 | the evaluated value of P(z) at z=z0 | ||
| 539 | */ | ||
| 540 | 3105600 | static inline PetscReal FILTLAN_PolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscInt idx,PetscReal z0,PetscReal aa,PetscReal bb) | |
| 541 | { | ||
| 542 | 3105600 | PetscInt ii,deg1; | |
| 543 | 3105600 | PetscReal y,zz,t0,t1,t2,*sc; | |
| 544 | |||
| 545 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
3105600 | PetscFunctionBegin; |
| 546 | 3105600 | deg1 = m; | |
| 547 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
3105600 | if (aa==-1.0 && bb==1.0) zz = z0; /* treat it as a special case to reduce rounding errors */ |
| 548 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
3105600 | else zz = (aa==bb)? 0.0 : -1.0+2.0*(z0-aa)/(bb-aa); |
| 549 | |||
| 550 | /* compute y = P(z0), where we utilize the Chebyshev recursion */ | ||
| 551 | 3105600 | sc = pp+(idx-1)*m; /* sc points to column idx of pp */ | |
| 552 | 3105600 | y = *sc++; | |
| 553 | 3105600 | t1 = 1.0; t2 = zz; | |
| 554 | 3105600 | ii = deg1-1; | |
| 555 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
616263700 | while (ii--) { |
| 556 | /* Chebyshev recursion: T_0(zz)=1, T_1(zz)=zz, and T_{i+1}(zz) = 2*zz*T_i(zz) + T_{i-1}(zz) for i>=2 | ||
| 557 | the values of T_{i+1}(zz), T_i(zz), T_{i-1}(zz) are stored in t0, t1, t2, respectively */ | ||
| 558 | 613158100 | t0 = 2*zz*t1 - t2; | |
| 559 | /* it also works for the base case / the first iteration, where t0 equals 2*zz*1-zz == zz which is T_1(zz) */ | ||
| 560 | 613158100 | t2 = t1; | |
| 561 | 613158100 | t1 = t0; | |
| 562 | 613158100 | y += (*sc++)*t0; | |
| 563 | } | ||
| 564 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
3105600 | PetscFunctionReturn(y); |
| 565 | } | ||
| 566 | |||
| 567 | #define basisTranslated PETSC_TRUE | ||
| 568 | /* | ||
| 569 | FILTLAN function PiecewisePolynomialEvaluationInChebyshevBasis | ||
| 570 | |||
| 571 | evaluate P(z) at z=z0, where P(z) is a piecewise polynomial expanded | ||
| 572 | in a basis of the (optionally translated, i.e. scale-and-shift) Chebyshev polynomials for each interval | ||
| 573 | |||
| 574 | input: | ||
| 575 | intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1)) | ||
| 576 | pp is a matrix of Chebyshev coefficients which defines a piecewise polynomial P(z) | ||
| 577 | in a basis of the `translated' Chebyshev polynomials in each interval | ||
| 578 | the polynomial P_j(z) in the j-th interval, i.e. when z is in [intv(j), intv(j+1)), is defined by the j-th column of pp: | ||
| 579 | if basisTranslated == false, then | ||
| 580 | P_j(z) = pp(1,j)*T_0(z) + pp(2,j)*T_1(z) + ... + pp(n,j)*T_{n-1}(z), | ||
| 581 | where T_i(z) is the Chebyshev polynomial of the first kind, | ||
| 582 | T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2 | ||
| 583 | if basisTranslated == true, then | ||
| 584 | P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z), | ||
| 585 | where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with | ||
| 586 | c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2 | ||
| 587 | |||
| 588 | output: | ||
| 589 | the evaluated value of P(z) at z=z0 | ||
| 590 | |||
| 591 | note that if z0 falls below the first interval, then the polynomial in the first interval will be used to evaluate P(z0) | ||
| 592 | if z0 flies over the last interval, then the polynomial in the last interval will be used to evaluate P(z0) | ||
| 593 | */ | ||
| 594 | 3105600 | static inline PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscReal *intv,PetscInt npoints,PetscReal z0) | |
| 595 | { | ||
| 596 | 3105600 | PetscReal *sintv,aa,bb,resul; | |
| 597 | 3105600 | PetscInt idx; | |
| 598 | |||
| 599 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
3105600 | PetscFunctionBegin; |
| 600 | /* determine the interval which contains z0 */ | ||
| 601 | 3105600 | sintv = &intv[1]; | |
| 602 | 3105600 | idx = 1; | |
| 603 |
3/4✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
3105600 | if (npoints>2 && z0 >= *sintv) { |
| 604 | 1414756 | sintv++; | |
| 605 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
5525928 | while (++idx < npoints-1) { |
| 606 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
4178064 | if (z0 < *sintv) break; |
| 607 | 4111172 | sintv++; | |
| 608 | } | ||
| 609 | } | ||
| 610 | /* idx==1 if npoints<=2; otherwise idx satisfies: | ||
| 611 | intv(idx) <= z0 < intv(idx+1), if 2 <= idx <= npoints-2 | ||
| 612 | z0 < intv(idx+1), if idx == 1 | ||
| 613 | intv(idx) <= z0, if idx == npoints-1 | ||
| 614 | in addition, sintv points to &intv(idx+1) */ | ||
| 615 | 3105600 | if (basisTranslated) { | |
| 616 | /* the basis consists of `translated' Chebyshev polynomials */ | ||
| 617 | /* find the interval of concern, [aa,bb] */ | ||
| 618 | 3105600 | aa = *(sintv-1); | |
| 619 | 3105600 | bb = *sintv; | |
| 620 | 3105600 | resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,aa,bb); | |
| 621 | } else { | ||
| 622 | /* the basis consists of standard Chebyshev polynomials, with interval [-1.0,1.0] for integration */ | ||
| 623 | resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,-1.0,1.0); | ||
| 624 | } | ||
| 625 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
3105600 | PetscFunctionReturn(resul); |
| 626 | } | ||
| 627 | |||
| 628 | /* | ||
| 629 | FILTLAN function PiecewisePolynomialInnerProductInChebyshevBasis | ||
| 630 | |||
| 631 | compute the weighted inner product of two piecewise polynomials expanded | ||
| 632 | in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval | ||
| 633 | |||
| 634 | pp and qq are two matrices of Chebyshev coefficients which define the piecewise polynomials P(z) and Q(z), respectively | ||
| 635 | for z in the j-th interval, P(z) equals | ||
| 636 | P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z), | ||
| 637 | and Q(z) equals | ||
| 638 | Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z), | ||
| 639 | where S_i(z) is the `translated' Chebyshev polynomial in that interval, | ||
| 640 | S_i((z-c)/h) = T_i(z), c = (aa+bb)) / 2, h = (bb-aa) / 2, | ||
| 641 | with T_i(z) the Chebyshev polynomial of the first kind | ||
| 642 | T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2 | ||
| 643 | |||
| 644 | the (scaled) j-th interval inner product is defined by | ||
| 645 | <P_j,Q_j> = (Pi/2)*(pp(1,j)*qq(1,j) + sum_{k} pp(k,j)*qq(k,j)), | ||
| 646 | which comes from the property | ||
| 647 | <T_0,T_0>=pi, <T_i,T_i>=pi/2 for i>=1, and <T_i,T_j>=0 for i!=j | ||
| 648 | |||
| 649 | the weighted inner product is <P,Q> = sum_{j} intervalWeights(j)*<P_j,Q_j>, | ||
| 650 | which is the return value | ||
| 651 | |||
| 652 | note that for unit weights, pass an empty vector of intervalWeights (i.e. of length 0) | ||
| 653 | */ | ||
| 654 | 6391605 | static inline PetscReal FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(PetscReal *pp,PetscInt prows,PetscInt pcols,PetscInt ldp,PetscReal *qq,PetscInt qrows,PetscInt qcols,PetscInt ldq,const PetscReal *intervalWeights) | |
| 655 | { | ||
| 656 | 6391605 | PetscInt nintv,deg1,i,k; | |
| 657 | 6391605 | PetscReal *sp,*sq,ans=0.0,ans2; | |
| 658 | |||
| 659 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
6391605 | PetscFunctionBegin; |
| 660 | 6391605 | deg1 = PetscMin(prows,qrows); /* number of effective coefficients, one more than the effective polynomial degree */ | |
| 661 |
2/14✓ Branch 0 taken 8 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
|
6391605 | if (PetscUnlikely(!deg1)) PetscFunctionReturn(0.0); |
| 662 | 6391605 | nintv = PetscMin(pcols,qcols); /* number of intervals */ | |
| 663 | |||
| 664 | /* scaled by intervalWeights(i) in the i-th interval (we assume intervalWeights[] are always provided). | ||
| 665 | compute ans = sum_{i=1,...,nintv} intervalWeights(i)*[ pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) ] */ | ||
| 666 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
38349630 | for (i=0;i<nintv;i++) { /* compute ans2 = pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) */ |
| 667 | 31958025 | sp = pp+i*ldp; | |
| 668 | 31958025 | sq = qq+i*ldq; | |
| 669 | 31958025 | ans2 = (*sp) * (*sq); /* the first term pp(1,i)*qq(1,i) is being added twice */ | |
| 670 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1889817825 | for (k=0;k<deg1;k++) ans2 += (*sp++)*(*sq++); /* add pp(k,i)*qq(k,i) */ |
| 671 | 31958025 | ans += ans2*intervalWeights[i]; /* compute ans += ans2*intervalWeights(i) */ | |
| 672 | } | ||
| 673 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
6391605 | PetscFunctionReturn(ans*PETSC_PI/2.0); |
| 674 | } | ||
| 675 | |||
| 676 | /* | ||
| 677 | FILTLAN function PiecewisePolynomialInChebyshevBasisMultiplyX | ||
| 678 | |||
| 679 | compute Q(z) = z*P(z), where P(z) and Q(z) are piecewise polynomials expanded | ||
| 680 | in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval | ||
| 681 | |||
| 682 | P(z) and Q(z) are stored as matrices of Chebyshev coefficients pp and qq, respectively | ||
| 683 | |||
| 684 | For z in the j-th interval, P(z) equals | ||
| 685 | P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z), | ||
| 686 | and Q(z) equals | ||
| 687 | Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z), | ||
| 688 | where S_i(z) is the `translated' Chebyshev polynomial in that interval, | ||
| 689 | S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2, | ||
| 690 | with T_i(z) the Chebyshev polynomial of the first kind | ||
| 691 | T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2 | ||
| 692 | |||
| 693 | the returned matrix is qq which represents Q(z) = z*P(z) | ||
| 694 | */ | ||
| 695 | 2134605 | static inline PetscErrorCode FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(PetscReal *pp,PetscInt deg1,PetscInt ldp,PetscReal *intv,PetscInt nintv,PetscReal *qq,PetscInt ldq) | |
| 696 | { | ||
| 697 | 2134605 | PetscInt i,j; | |
| 698 | 2134605 | PetscReal c,h,h2,tmp,*sp,*sq,*sp2,*sq2; | |
| 699 | |||
| 700 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2134605 | PetscFunctionBegin; |
| 701 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
12807630 | for (j=0;j<nintv;j++) { /* consider interval between intv(j) and intv(j+1) */ |
| 702 | 10673025 | sp = pp+j*ldp; | |
| 703 | 10673025 | sq = qq+j*ldq; | |
| 704 | 10673025 | sp2 = sp; | |
| 705 | 10673025 | sq2 = sq+1; | |
| 706 | 10673025 | c = 0.5*(intv[j] + intv[j+1]); /* compute c = (intv(j) + intv(j+1))/2 */ | |
| 707 | 10673025 | h = 0.5*(intv[j+1] - intv[j]); /* compute h = (intv(j+1) - intv(j))/2 and h2 = h/2 */ | |
| 708 | 10673025 | h2 = 0.5*h; | |
| 709 | 10673025 | i = deg1; | |
| 710 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
826715325 | while (i--) *sq++ = c*(*sp++); /* compute q(1:deg1,j) = c*p(1:deg1,j) */ |
| 711 | 10673025 | *sq++ = 0.0; /* set q(deg1+1,j) = 0.0 */ | |
| 712 | 10673025 | *(sq2++) += h*(*sp2++); /* compute q(2,j) = q(2,j) + h*p(1,j) */ | |
| 713 | 10673025 | i = deg1-1; | |
| 714 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
816042300 | while (i--) { /* compute q(3:deg1+1,j) += h2*p(2:deg1,j) and then q(1:deg1-1,j) += h2*p(2:deg1,j) */ |
| 715 | 805369275 | tmp = h2*(*sp2++); | |
| 716 | 805369275 | *(sq2-2) += tmp; | |
| 717 | 805369275 | *(sq2++) += tmp; | |
| 718 | } | ||
| 719 | } | ||
| 720 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
2134605 | PetscFunctionReturn(PETSC_SUCCESS); |
| 721 | } | ||
| 722 | |||
| 723 | /* //////////////////////////////////////////////////////////////////////////// | ||
| 724 | // Conjugate Residual Method in the Polynomial Space | ||
| 725 | //////////////////////////////////////////////////////////////////////////// */ | ||
| 726 | |||
| 727 | /* | ||
| 728 | B := alpha*A + beta*B | ||
| 729 | |||
| 730 | A,B are nxk | ||
| 731 | */ | ||
| 732 | 8494660 | static inline PetscErrorCode Mat_AXPY_BLAS(PetscInt n,PetscInt k,PetscReal alpha,const PetscReal *A,PetscInt lda,PetscReal beta,PetscReal *B,PetscInt ldb) | |
| 733 | { | ||
| 734 | 8494660 | PetscInt i,j; | |
| 735 | |||
| 736 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
8494660 | PetscFunctionBegin; |
| 737 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
8494660 | if (beta==(PetscReal)1.0) { |
| 738 |
4/4✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
1670879500 | for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] += alpha*A[i+j*lda]; |
| 739 |
3/6✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
4257000 | PetscCall(PetscLogFlops(2.0*n*k)); |
| 740 | } else { | ||
| 741 |
4/4✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
1667789210 | for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] = alpha*A[i+j*lda] + beta*B[i+j*ldb]; |
| 742 |
3/6✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
4237660 | PetscCall(PetscLogFlops(3.0*n*k)); |
| 743 | } | ||
| 744 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
8494660 | PetscFunctionReturn(PETSC_SUCCESS); |
| 745 | } | ||
| 746 | |||
| 747 | /* | ||
| 748 | FILTLAN function FilteredConjugateResidualPolynomial | ||
| 749 | |||
| 750 | ** Conjugate Residual Method in the Polynomial Space | ||
| 751 | |||
| 752 | this routine employs a conjugate-residual-type algorithm in polynomial space to minimize ||P(z)-Q(z)||_w, | ||
| 753 | where P(z), the base filter, is the input piecewise polynomial, and | ||
| 754 | Q(z) is the output polynomial satisfying Q(0)==1, i.e. the constant term of Q(z) is 1 | ||
| 755 | niter is the number of conjugate-residual iterations; therefore, the degree of Q(z) is up to niter+1 | ||
| 756 | both P(z) and Q(z) are expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval, | ||
| 757 | and presented as matrices of Chebyshev coefficients, denoted by pp and qq, respectively | ||
| 758 | |||
| 759 | input: | ||
| 760 | intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1)) | ||
| 761 | w is a vector of Chebyshev weights; the weight of j-th interval is w(j) | ||
| 762 | the interval weights define the inner product of two continuous functions and then | ||
| 763 | the derived w-norm ||P(z)-Q(z)||_w | ||
| 764 | pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z) | ||
| 765 | to be specific, for z in [intv(j), intv(j+1)), P(z) equals | ||
| 766 | P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z), | ||
| 767 | where S_i(z) is the `translated' Chebyshev polynomial in that interval, | ||
| 768 | S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2, | ||
| 769 | with T_i(z) the Chebyshev polynomial of the first kind, | ||
| 770 | T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2 | ||
| 771 | |||
| 772 | output: | ||
| 773 | the return matrix, denoted by qq, represents a polynomial Q(z) with degree up to 1+niter | ||
| 774 | and satisfying Q(0)==1, such that ||P(z))-Q(z)||_w is minimized | ||
| 775 | this polynomial Q(z) is expanded in the `translated' Chebyshev basis for each interval | ||
| 776 | to be precise, considering z in [intv(j), intv(j+1)), Q(z) equals | ||
| 777 | Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(niter+2,j)*S_{niter+1}(z) | ||
| 778 | |||
| 779 | note: | ||
| 780 | 1. since Q(0)==1, P(0)==1 is expected; if P(0)!=1, one can translate P(z) | ||
| 781 | for example, if P(0)==0, one can use 1-P(z) as input instead of P(z) | ||
| 782 | 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation | ||
| 783 | in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals | ||
| 784 | */ | ||
| 785 | 9670 | static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal *cpol,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt m,PetscReal *intervalWeights,PetscInt niter) | |
| 786 | { | ||
| 787 | 9670 | PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld,nintv; | |
| 788 | 9670 | PetscReal rho,rho0,rho1,den,bet,alp,alp0,*ppol,*rpol,*appol,*arpol; | |
| 789 | |||
| 790 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
9670 | PetscFunctionBegin; |
| 791 | 9670 | nintv = m-1; | |
| 792 | 9670 | ld = niter+2; /* leading dimension */ | |
| 793 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
9670 | PetscCall(PetscCalloc4(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&appol,ld*nintv,&arpol)); |
| 794 |
4/6✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
9670 | PetscCall(PetscArrayzero(cpol,ld*nintv)); |
| 795 | /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */ | ||
| 796 | 58020 | sppol = 2; | |
| 797 | 58020 | srpol = 2; | |
| 798 | 58020 | scpol = 2; | |
| 799 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
58020 | for (j=0;j<nintv;j++) { |
| 800 | 48350 | ppol[j*ld] = 1.0; | |
| 801 | 48350 | rpol[j*ld] = 1.0; | |
| 802 | 48350 | cpol[j*ld] = 1.0; | |
| 803 | } | ||
| 804 | /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG) | ||
| 805 | rpol is the r-polynomial (corresponding to the residual vector r in CG) | ||
| 806 | cpol is the "corrected" residual polynomial (result of this function) */ | ||
| 807 | 9670 | sappol = 3; | |
| 808 | 9670 | sarpol = 3; | |
| 809 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
9670 | PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld)); |
| 810 |
4/4✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
183730 | for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld]; |
| 811 | 9670 | rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights); | |
| 812 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
1339258 | for (i=0;i<niter;i++) { |
| 813 | 1337500 | den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights); | |
| 814 | 1337500 | alp0 = rho/den; | |
| 815 | 1337500 | rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights); | |
| 816 | 1337500 | alp = (rho-rho1)/den; | |
| 817 | 1337500 | srpol++; | |
| 818 | 1337500 | scpol++; | |
| 819 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1337500 | PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld)); |
| 820 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1337500 | PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld)); |
| 821 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1337500 | if (i+1 == niter) break; |
| 822 | 1327830 | sarpol++; | |
| 823 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1327830 | PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld)); |
| 824 | 1327830 | rho0 = rho; | |
| 825 | 1327830 | rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights); | |
| 826 | 1327830 | bet = rho / rho0; | |
| 827 | 1327830 | sppol++; | |
| 828 | 1327830 | sappol++; | |
| 829 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1327830 | PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld)); |
| 830 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1327830 | PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld)); |
| 831 | } | ||
| 832 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
9670 | PetscCall(PetscFree4(ppol,rpol,appol,arpol)); |
| 833 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
1758 | PetscFunctionReturn(PETSC_SUCCESS); |
| 834 | } | ||
| 835 | |||
| 836 | /* | ||
| 837 | FILTLAN function FilteredConjugateResidualMatrixPolynomialVectorProduct | ||
| 838 | |||
| 839 | this routine employs a conjugate-residual-type algorithm in polynomial space to compute | ||
| 840 | x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized, where | ||
| 841 | P(z) is a given piecewise polynomial, called the base filter, | ||
| 842 | s(z) is a polynomial of degree up to niter, the number of conjugate-residual iterations, | ||
| 843 | and b and x0 are given vectors | ||
| 844 | |||
| 845 | note that P(z) is expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval, | ||
| 846 | and presented as a matrix of Chebyshev coefficients pp | ||
| 847 | |||
| 848 | input: | ||
| 849 | A is a sparse matrix | ||
| 850 | x0, b are vectors | ||
| 851 | niter is the number of conjugate-residual iterations | ||
| 852 | intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1)) | ||
| 853 | w is a vector of Chebyshev weights; the weight of j-th interval is w(j) | ||
| 854 | the interval weights define the inner product of two continuous functions and then | ||
| 855 | the derived w-norm ||P(z)-Q(z)||_w | ||
| 856 | pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z) | ||
| 857 | to be specific, for z in [intv(j), intv(j+1)), P(z) equals | ||
| 858 | P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z), | ||
| 859 | where S_i(z) is the `translated' Chebyshev polynomial in that interval, | ||
| 860 | S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2, | ||
| 861 | with T_i(z) the Chebyshev polynomial of the first kind, | ||
| 862 | T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2 | ||
| 863 | tol is the tolerance; if the residual polynomial in z-norm is dropped by a factor lower | ||
| 864 | than tol, then stop the conjugate-residual iteration | ||
| 865 | |||
| 866 | output: | ||
| 867 | the return vector is x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized, | ||
| 868 | subject to that s(z) is a polynomial of degree up to niter, where P(z) is the base filter | ||
| 869 | in short, z*s(z) approximates 1-P(z) | ||
| 870 | |||
| 871 | note: | ||
| 872 | 1. since z*s(z) approximates 1-P(z), P(0)==1 is expected; if P(0)!=1, one can translate P(z) | ||
| 873 | for example, if P(0)==0, one can use 1-P(z) as input instead of P(z) | ||
| 874 | 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation | ||
| 875 | in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals | ||
| 876 | 3. a common application is to compute R(A)*b, where R(z) approximates 1-P(z) | ||
| 877 | in this case, one can set x0 = 0 and then the return vector is x = s(A)*b, where | ||
| 878 | z*s(z) approximates 1-P(z); therefore, A*x is the wanted R(A)*b | ||
| 879 | */ | ||
| 880 | 5748 | static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(Mat A,Vec b,Vec x,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work) | |
| 881 | { | ||
| 882 | 5748 | PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld; | |
| 883 | 5748 | PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0; | |
| 884 | 5748 | Vec r=work[0],p=work[1],ap=work[2],w=work[3]; | |
| 885 | 5748 | PetscScalar alpha; | |
| 886 | |||
| 887 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
5748 | PetscFunctionBegin; |
| 888 | 5748 | ld = niter+3; /* leading dimension */ | |
| 889 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol)); |
| 890 | /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */ | ||
| 891 | 34488 | sppol = 2; | |
| 892 | 34488 | srpol = 2; | |
| 893 | 34488 | scpol = 2; | |
| 894 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
34488 | for (j=0;j<nintv;j++) { |
| 895 | 28740 | ppol[j*ld] = 1.0; | |
| 896 | 28740 | rpol[j*ld] = 1.0; | |
| 897 | 28740 | cpol[j*ld] = 1.0; | |
| 898 | } | ||
| 899 | /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG) | ||
| 900 | rpol is the r-polynomial (corresponding to the residual vector r in CG) | ||
| 901 | cpol is the "corrected" residual polynomial */ | ||
| 902 | 5748 | sappol = 3; | |
| 903 | 5748 | sarpol = 3; | |
| 904 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld)); |
| 905 |
4/4✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
109212 | for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld]; |
| 906 | 5748 | rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights); | |
| 907 | 5748 | rho = rho00; | |
| 908 | |||
| 909 | /* corrected CR in vector space */ | ||
| 910 | /* we assume x0 is always 0 */ | ||
| 911 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(VecSet(x,0.0)); |
| 912 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(VecCopy(b,r)); /* initial residual r = b-A*x0 */ |
| 913 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(VecCopy(r,p)); /* p = r */ |
| 914 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(MatMult(A,p,ap)); /* ap = A*p */ |
| 915 | |||
| 916 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
725348 | for (i=0;i<niter;i++) { |
| 917 | /* iteration in the polynomial space */ | ||
| 918 | 719600 | den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights); | |
| 919 | 719600 | alp0 = rho/den; | |
| 920 | 719600 | rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights); | |
| 921 | 719600 | alp = (rho-rho1)/den; | |
| 922 | 719600 | srpol++; | |
| 923 | 719600 | scpol++; | |
| 924 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld)); |
| 925 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld)); |
| 926 | 719600 | sarpol++; | |
| 927 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld)); |
| 928 | 719600 | rho0 = rho; | |
| 929 | 719600 | rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights); | |
| 930 | |||
| 931 | /* update x in the vector space */ | ||
| 932 | 719600 | alpha = alp; | |
| 933 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(VecAXPY(x,alpha,p)); /* x += alp*p */ |
| 934 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
719600 | if (rho < tol*rho00) break; |
| 935 | |||
| 936 | /* finish the iteration in the polynomial space */ | ||
| 937 | 719600 | bet = rho / rho0; | |
| 938 | 719600 | sppol++; | |
| 939 | 719600 | sappol++; | |
| 940 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld)); |
| 941 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld)); |
| 942 | |||
| 943 | /* finish the iteration in the vector space */ | ||
| 944 | 719600 | alpha = -alp0; | |
| 945 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(VecAXPY(r,alpha,ap)); /* r -= alp0*ap */ |
| 946 | 719600 | alpha = bet; | |
| 947 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(VecAYPX(p,alpha,r)); /* p = r + bet*p */ |
| 948 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(MatMult(A,r,w)); /* ap = A*r + bet*ap */ |
| 949 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
719600 | PetscCall(VecAYPX(ap,alpha,w)); |
| 950 | } | ||
| 951 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol)); |
| 952 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
1084 | PetscFunctionReturn(PETSC_SUCCESS); |
| 953 | } | ||
| 954 | |||
| 955 | /* | ||
| 956 | Gateway to FILTLAN for evaluating y=p(A)*x | ||
| 957 | */ | ||
| 958 | 5748 | static PetscErrorCode MatMult_FILTLAN(Mat A,Vec x,Vec y) | |
| 959 | { | ||
| 960 | 5748 | ST st; | |
| 961 | 5748 | ST_FILTER *ctx; | |
| 962 | 5748 | FILTLAN_CTX filtlan; | |
| 963 | 5748 | PetscInt npoints; | |
| 964 | |||
| 965 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
5748 | PetscFunctionBegin; |
| 966 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(MatShellGetContext(A,&st)); |
| 967 | 5748 | ctx = (ST_FILTER*)st->data; | |
| 968 | 5748 | filtlan = (FILTLAN_CTX)ctx->data; | |
| 969 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
5748 | npoints = (filtlan->filterInfo->filterType == 2)? 6: 4; |
| 970 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(ctx->T,x,y,filtlan->baseFilter,2*filtlan->baseDegree+2,filtlan->intervals,npoints-1,filtlan->opts->intervalWeights,ctx->polyDegree,st->work)); |
| 971 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(VecCopy(y,st->work[0])); |
| 972 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
5748 | PetscCall(MatMult(ctx->T,st->work[0],y)); |
| 973 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
1084 | PetscFunctionReturn(PETSC_SUCCESS); |
| 974 | } | ||
| 975 | |||
| 976 | /* Block version of FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct */ | ||
| 977 | 357 | static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(Mat A,Mat B,Mat C,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work,Mat R,Mat P,Mat AP,Mat W) | |
| 978 | { | ||
| 979 | 357 | PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld; | |
| 980 | 357 | PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0; | |
| 981 | 357 | PetscScalar alpha; | |
| 982 | |||
| 983 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
357 | PetscFunctionBegin; |
| 984 | 357 | ld = niter+3; /* leading dimension */ | |
| 985 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol)); |
| 986 | /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */ | ||
| 987 | 2142 | sppol = 2; | |
| 988 | 2142 | srpol = 2; | |
| 989 | 2142 | scpol = 2; | |
| 990 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
2142 | for (j=0;j<nintv;j++) { |
| 991 | 1785 | ppol[j*ld] = 1.0; | |
| 992 | 1785 | rpol[j*ld] = 1.0; | |
| 993 | 1785 | cpol[j*ld] = 1.0; | |
| 994 | } | ||
| 995 | /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG) | ||
| 996 | rpol is the r-polynomial (corresponding to the residual vector r in CG) | ||
| 997 | cpol is the "corrected" residual polynomial */ | ||
| 998 | 357 | sappol = 3; | |
| 999 | 357 | sarpol = 3; | |
| 1000 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld)); |
| 1001 |
4/4✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
|
6783 | for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld]; |
| 1002 | 357 | rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights); | |
| 1003 | 357 | rho = rho00; | |
| 1004 | |||
| 1005 | /* corrected CR in vector space */ | ||
| 1006 | /* we assume x0 is always 0 */ | ||
| 1007 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(MatZeroEntries(C)); |
| 1008 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(MatCopy(B,R,SAME_NONZERO_PATTERN)); /* initial residual r = b-A*x0 */ |
| 1009 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(MatCopy(R,P,SAME_NONZERO_PATTERN)); /* p = r */ |
| 1010 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(MatMatMult(A,P,MAT_REUSE_MATRIX,PETSC_DEFAULT,&AP)); /* ap = A*p */ |
| 1011 | |||
| 1012 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
71757 | for (i=0;i<niter;i++) { |
| 1013 | /* iteration in the polynomial space */ | ||
| 1014 | 71400 | den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights); | |
| 1015 | 71400 | alp0 = rho/den; | |
| 1016 | 71400 | rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights); | |
| 1017 | 71400 | alp = (rho-rho1)/den; | |
| 1018 | 71400 | srpol++; | |
| 1019 | 71400 | scpol++; | |
| 1020 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld)); |
| 1021 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld)); |
| 1022 | 71400 | sarpol++; | |
| 1023 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld)); |
| 1024 | 71400 | rho0 = rho; | |
| 1025 | 71400 | rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights); | |
| 1026 | |||
| 1027 | /* update x in the vector space */ | ||
| 1028 | 71400 | alpha = alp; | |
| 1029 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(MatAXPY(C,alpha,P,SAME_NONZERO_PATTERN)); /* x += alp*p */ |
| 1030 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
71400 | if (rho < tol*rho00) break; |
| 1031 | |||
| 1032 | /* finish the iteration in the polynomial space */ | ||
| 1033 | 71400 | bet = rho / rho0; | |
| 1034 | 71400 | sppol++; | |
| 1035 | 71400 | sappol++; | |
| 1036 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld)); |
| 1037 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld)); |
| 1038 | |||
| 1039 | /* finish the iteration in the vector space */ | ||
| 1040 | 71400 | alpha = -alp0; | |
| 1041 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(MatAXPY(R,alpha,AP,SAME_NONZERO_PATTERN)); /* r -= alp0*ap */ |
| 1042 | 71400 | alpha = bet; | |
| 1043 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(MatAYPX(P,alpha,R,SAME_NONZERO_PATTERN)); /* p = r + bet*p */ |
| 1044 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(MatMatMult(A,R,MAT_REUSE_MATRIX,PETSC_DEFAULT,&W)); /* ap = A*r + bet*ap */ |
| 1045 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
71400 | PetscCall(MatAYPX(AP,alpha,W,SAME_NONZERO_PATTERN)); |
| 1046 | } | ||
| 1047 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(PetscFree5(ppol,rpol,cpol,appol,arpol)); |
| 1048 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
51 | PetscFunctionReturn(PETSC_SUCCESS); |
| 1049 | } | ||
| 1050 | |||
| 1051 | /* | ||
| 1052 | Gateway to FILTLAN for evaluating C=p(A)*B | ||
| 1053 | */ | ||
| 1054 | 357 | static PetscErrorCode MatMatMult_FILTLAN(Mat A,Mat B,Mat C,void *pctx) | |
| 1055 | { | ||
| 1056 | 357 | ST st; | |
| 1057 | 357 | ST_FILTER *ctx; | |
| 1058 | 357 | FILTLAN_CTX filtlan; | |
| 1059 | 357 | PetscInt i,m1,m2,npoints; | |
| 1060 | |||
| 1061 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
357 | PetscFunctionBegin; |
| 1062 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(MatShellGetContext(A,&st)); |
| 1063 | 357 | ctx = (ST_FILTER*)st->data; | |
| 1064 | 357 | filtlan = (FILTLAN_CTX)ctx->data; | |
| 1065 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
357 | npoints = (filtlan->filterInfo->filterType == 2)? 6: 4; |
| 1066 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
357 | if (ctx->nW) { /* check if work matrices must be resized */ |
| 1067 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
343 | PetscCall(MatGetSize(B,NULL,&m1)); |
| 1068 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
343 | PetscCall(MatGetSize(ctx->W[0],NULL,&m2)); |
| 1069 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
343 | if (m1!=m2) { |
| 1070 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
42 | PetscCall(MatDestroyMatrices(ctx->nW,&ctx->W)); |
| 1071 | 42 | ctx->nW = 0; | |
| 1072 | } | ||
| 1073 | } | ||
| 1074 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
357 | if (!ctx->nW) { /* allocate work matrices */ |
| 1075 | 56 | ctx->nW = 4; | |
| 1076 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
56 | PetscCall(PetscMalloc1(ctx->nW,&ctx->W)); |
| 1077 |
7/8✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 8 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 2 times.
|
280 | for (i=0;i<ctx->nW;i++) PetscCall(MatDuplicate(B,MAT_DO_NOT_COPY_VALUES,&ctx->W[i])); |
| 1078 | } | ||
| 1079 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(ctx->T,B,ctx->W[0],filtlan->baseFilter,2*filtlan->baseDegree+2,filtlan->intervals,npoints-1,filtlan->opts->intervalWeights,ctx->polyDegree,st->work,C,ctx->W[1],ctx->W[2],ctx->W[3])); |
| 1080 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
357 | PetscCall(MatMatMult(ctx->T,ctx->W[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&C)); |
| 1081 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
51 | PetscFunctionReturn(PETSC_SUCCESS); |
| 1082 | } | ||
| 1083 | |||
| 1084 | /* | ||
| 1085 | FILTLAN function PolynomialFilterInterface::setFilter | ||
| 1086 | |||
| 1087 | Creates the shifted (and scaled) matrix and the base filter P(z). | ||
| 1088 | M is a shell matrix whose MatMult() applies the filter. | ||
| 1089 | */ | ||
| 1090 | 88 | static PetscErrorCode STComputeOperator_Filter_FILTLAN(ST st,Mat *G) | |
| 1091 | { | ||
| 1092 | 88 | ST_FILTER *ctx = (ST_FILTER*)st->data; | |
| 1093 | 88 | FILTLAN_CTX filtlan = (FILTLAN_CTX)ctx->data; | |
| 1094 | 88 | PetscInt i,npoints,n,m,N,M; | |
| 1095 | 88 | PetscReal frame2[4]; | |
| 1096 | 88 | PetscScalar alpha; | |
| 1097 | 88 | const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 }; | |
| 1098 | |||
| 1099 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
88 | PetscFunctionBegin; |
| 1100 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
88 | PetscCall(STSetWorkVecs(st,4)); |
| 1101 | 88 | filtlan->frame[0] = ctx->left; | |
| 1102 | 88 | filtlan->frame[1] = ctx->inta; | |
| 1103 | 88 | filtlan->frame[2] = ctx->intb; | |
| 1104 | 88 | filtlan->frame[3] = ctx->right; | |
| 1105 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
88 | if (filtlan->frame[0] == filtlan->frame[1]) { /* low pass filter, convert it to high pass filter */ |
| 1106 | /* T = frame[3]*eye(n) - A */ | ||
| 1107 | ✗ | PetscCall(MatDestroy(&ctx->T)); | |
| 1108 | ✗ | PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T)); | |
| 1109 | ✗ | PetscCall(MatScale(ctx->T,-1.0)); | |
| 1110 | ✗ | alpha = filtlan->frame[3]; | |
| 1111 | ✗ | PetscCall(MatShift(ctx->T,alpha)); | |
| 1112 | ✗ | for (i=0;i<4;i++) frame2[i] = filtlan->frame[3] - filtlan->frame[3-i]; | |
| 1113 | } else { /* it can be a mid-pass filter or a high-pass filter */ | ||
| 1114 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
88 | if (filtlan->frame[0] == 0.0) { |
| 1115 | ✗ | PetscCall(PetscObjectReference((PetscObject)st->A[0])); | |
| 1116 | ✗ | PetscCall(MatDestroy(&ctx->T)); | |
| 1117 | ✗ | ctx->T = st->A[0]; | |
| 1118 | ✗ | for (i=0;i<4;i++) frame2[i] = filtlan->frame[i]; | |
| 1119 | } else { | ||
| 1120 | /* T = A - frame[0]*eye(n) */ | ||
| 1121 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
88 | PetscCall(MatDestroy(&ctx->T)); |
| 1122 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
88 | PetscCall(MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T)); |
| 1123 | 88 | alpha = -filtlan->frame[0]; | |
| 1124 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
88 | PetscCall(MatShift(ctx->T,alpha)); |
| 1125 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
440 | for (i=0;i<4;i++) frame2[i] = filtlan->frame[i] - filtlan->frame[0]; |
| 1126 | } | ||
| 1127 | } | ||
| 1128 | |||
| 1129 | /* no need to recompute filter if the parameters did not change */ | ||
| 1130 |
3/4✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 10 times.
|
88 | if (st->state==ST_STATE_INITIAL || ctx->filtch) { |
| 1131 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(FILTLAN_GetIntervals(filtlan->intervals,frame2,ctx->polyDegree,filtlan->baseDegree,filtlan->opts,filtlan->filterInfo)); |
| 1132 | /* translate the intervals back */ | ||
| 1133 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
78 | if (filtlan->frame[0] == filtlan->frame[1]) { /* low pass filter, convert it to high pass filter */ |
| 1134 | ✗ | for (i=0;i<4;i++) filtlan->intervals2[i] = filtlan->frame[3] - filtlan->intervals[3-i]; | |
| 1135 | } else { /* it can be a mid-pass filter or a high-pass filter */ | ||
| 1136 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
78 | if (filtlan->frame[0] == 0.0) { |
| 1137 | ✗ | for (i=0;i<6;i++) filtlan->intervals2[i] = filtlan->intervals[i]; | |
| 1138 | } else { | ||
| 1139 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
546 | for (i=0;i<6;i++) filtlan->intervals2[i] = filtlan->intervals[i] + filtlan->frame[0]; |
| 1140 | } | ||
| 1141 | } | ||
| 1142 | |||
| 1143 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
78 | npoints = (filtlan->filterInfo->filterType == 2)? 6: 4; |
| 1144 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
78 | PetscCall(PetscFree(filtlan->baseFilter)); |
| 1145 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(PetscMalloc1((2*filtlan->baseDegree+2)*(npoints-1),&filtlan->baseFilter)); |
| 1146 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(FILTLAN_HermiteBaseFilterInChebyshevBasis(filtlan->baseFilter,filtlan->intervals,npoints,HighLowFlags,filtlan->baseDegree)); |
| 1147 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(PetscInfo(st,"Computed value of yLimit = %g\n",(double)filtlan->filterInfo->yLimit)); |
| 1148 | } | ||
| 1149 | 88 | ctx->filtch = PETSC_FALSE; | |
| 1150 | |||
| 1151 | /* create shell matrix*/ | ||
| 1152 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
88 | if (!*G) { |
| 1153 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(MatGetSize(ctx->T,&N,&M)); |
| 1154 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(MatGetLocalSize(ctx->T,&n,&m)); |
| 1155 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(MatCreateShell(PetscObjectComm((PetscObject)st),n,m,N,M,st,G)); |
| 1156 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(MatShellSetOperation(*G,MATOP_MULT,(PetscErrorCodeFn*)MatMult_FILTLAN)); |
| 1157 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSE,MATDENSE)); |
| 1158 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSECUDA,MATDENSECUDA)); |
| 1159 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
78 | PetscCall(MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSEHIP,MATDENSEHIP)); |
| 1160 | } | ||
| 1161 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
16 | PetscFunctionReturn(PETSC_SUCCESS); |
| 1162 | } | ||
| 1163 | |||
| 1164 | 287 | static PetscErrorCode STFilterGetThreshold_Filter_FILTLAN(ST st,PetscReal *gamma) | |
| 1165 | { | ||
| 1166 | 287 | ST_FILTER *ctx = (ST_FILTER*)st->data; | |
| 1167 | 287 | FILTLAN_CTX filtlan = (FILTLAN_CTX)ctx->data; | |
| 1168 | |||
| 1169 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
287 | PetscFunctionBegin; |
| 1170 | 287 | *gamma = filtlan->filterInfo->yLimit; | |
| 1171 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
287 | PetscFunctionReturn(PETSC_SUCCESS); |
| 1172 | } | ||
| 1173 | |||
| 1174 | 68 | static PetscErrorCode STDestroy_Filter_FILTLAN(ST st) | |
| 1175 | { | ||
| 1176 | 68 | ST_FILTER *ctx = (ST_FILTER*)st->data; | |
| 1177 | 68 | FILTLAN_CTX filtlan = (FILTLAN_CTX)ctx->data; | |
| 1178 | |||
| 1179 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
68 | PetscFunctionBegin; |
| 1180 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
68 | PetscCall(PetscFree(filtlan->opts)); |
| 1181 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
68 | PetscCall(PetscFree(filtlan->filterInfo)); |
| 1182 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
68 | PetscCall(PetscFree(filtlan->baseFilter)); |
| 1183 |
6/8✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
68 | PetscCall(PetscFree(filtlan)); |
| 1184 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
12 | PetscFunctionReturn(PETSC_SUCCESS); |
| 1185 | } | ||
| 1186 | |||
| 1187 | 68 | PetscErrorCode STCreate_Filter_FILTLAN(ST st) | |
| 1188 | { | ||
| 1189 | 68 | ST_FILTER *ctx = (ST_FILTER*)st->data; | |
| 1190 | 68 | FILTLAN_CTX filtlan; | |
| 1191 | 68 | FILTLAN_IOP iop; | |
| 1192 | 68 | FILTLAN_PFI pfi; | |
| 1193 | |||
| 1194 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
68 | PetscFunctionBegin; |
| 1195 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
68 | PetscCall(PetscNew(&filtlan)); |
| 1196 | 68 | ctx->data = (void*)filtlan; | |
| 1197 | 68 | filtlan->baseDegree = 10; | |
| 1198 | |||
| 1199 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
68 | PetscCall(PetscNew(&iop)); |
| 1200 | 68 | filtlan->opts = iop; | |
| 1201 | 68 | iop->intervalWeights[0] = 100.0; | |
| 1202 | 68 | iop->intervalWeights[1] = 1.0; | |
| 1203 | 68 | iop->intervalWeights[2] = 1.0; | |
| 1204 | 68 | iop->intervalWeights[3] = 1.0; | |
| 1205 | 68 | iop->intervalWeights[4] = 100.0; | |
| 1206 | 68 | iop->transIntervalRatio = 0.6; | |
| 1207 | 68 | iop->reverseInterval = PETSC_FALSE; | |
| 1208 | 68 | iop->initialPlateau = 0.1; | |
| 1209 | 68 | iop->plateauShrinkRate = 1.5; | |
| 1210 | 68 | iop->initialShiftStep = 0.01; | |
| 1211 | 68 | iop->shiftStepExpanRate = 1.5; | |
| 1212 | 68 | iop->maxInnerIter = 30; | |
| 1213 | 68 | iop->yLimitTol = 0.0001; | |
| 1214 | 68 | iop->maxOuterIter = 50; | |
| 1215 | 68 | iop->numGridPoints = 200; | |
| 1216 | 68 | iop->yBottomLine = 0.001; | |
| 1217 | 68 | iop->yRippleLimit = 0.8; | |
| 1218 | |||
| 1219 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
68 | PetscCall(PetscNew(&pfi)); |
| 1220 | 68 | filtlan->filterInfo = pfi; | |
| 1221 | |||
| 1222 | 68 | ctx->computeoperator = STComputeOperator_Filter_FILTLAN; | |
| 1223 | 68 | ctx->getthreshold = STFilterGetThreshold_Filter_FILTLAN; | |
| 1224 | 68 | ctx->destroy = STDestroy_Filter_FILTLAN; | |
| 1225 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
68 | PetscFunctionReturn(PETSC_SUCCESS); |
| 1226 | } | ||
| 1227 |