Line data Source code
1 : /*
2 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 : SLEPc - Scalable Library for Eigenvalue Problem Computations
4 : Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5 :
6 : This file is part of SLEPc.
7 : SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 : */
10 :
11 : #include <slepc/private/dsimpl.h>
12 : #include <slepcblaslapack.h>
13 :
14 : struct HRtr
15 : {
16 : PetscScalar *data;
17 : PetscInt m;
18 : PetscInt idx[2];
19 : PetscInt n[2];
20 : PetscScalar tau[2];
21 : PetscReal alpha;
22 : PetscReal cs;
23 : PetscReal sn;
24 : PetscInt type;
25 : };
26 :
27 : /*
28 : Generates a hyperbolic rotation
29 : if x1*x1 - x2*x2 != 0
30 : r = sqrt(|x1*x1 - x2*x2|)
31 : c = x1/r s = x2/r
32 :
33 : | c -s||x1| |d*r|
34 : |-s c||x2| = | 0 |
35 : where d = 1 for type==1 and -1 for type==2
36 : Returns the condition number of the reduction
37 : */
38 51981 : static PetscErrorCode HRGen(PetscReal x1,PetscReal x2,PetscInt *type,PetscReal *c,PetscReal *s,PetscReal *r,PetscReal *cond)
39 : {
40 51981 : PetscReal t,n2,xa,xb;
41 51981 : PetscInt type_;
42 :
43 51981 : PetscFunctionBegin;
44 51981 : if (x2==0.0) {
45 0 : *r = PetscAbsReal(x1); *c = (x1>=0.0)?1.0:-1.0; *s = 0.0;
46 0 : if (type) *type = 1;
47 0 : PetscFunctionReturn(PETSC_SUCCESS);
48 : }
49 51981 : if (PetscAbsReal(x1) == PetscAbsReal(x2)) {
50 : /* hyperbolic rotation doesn't exist */
51 0 : *c = *s = *r = 0.0;
52 0 : if (type) *type = 0;
53 0 : *cond = PETSC_MAX_REAL;
54 0 : PetscFunctionReturn(PETSC_SUCCESS);
55 : }
56 :
57 51981 : if (PetscAbsReal(x1)>PetscAbsReal(x2)) {
58 : xa = x1; xb = x2; type_ = 1;
59 : } else {
60 9198 : xa = x2; xb = x1; type_ = 2;
61 : }
62 51981 : t = xb/xa;
63 51981 : n2 = PetscAbsReal(1 - t*t);
64 51981 : *r = PetscSqrtReal(n2)*PetscAbsReal(xa);
65 51981 : *c = x1/(*r);
66 51981 : *s = x2/(*r);
67 51981 : if (type_ == 2) *r *= -1;
68 51981 : if (type) *type = type_;
69 51981 : if (cond) *cond = (PetscAbsReal(*c) + PetscAbsReal(*s))/PetscAbsReal(PetscAbsReal(*c) - PetscAbsReal(*s));
70 51981 : PetscFunctionReturn(PETSC_SUCCESS);
71 : }
72 :
73 : /*
74 : |c s|
75 : Applies an hyperbolic rotator |s c|
76 : |c s|
77 : [x1 x2]|s c|
78 : */
79 125060 : static PetscErrorCode HRApply(PetscInt n,PetscScalar *x1,PetscInt inc1,PetscScalar *x2,PetscInt inc2,PetscReal c,PetscReal s)
80 : {
81 125060 : PetscInt i;
82 125060 : PetscReal t;
83 125060 : PetscScalar tmp;
84 :
85 125060 : PetscFunctionBegin;
86 125060 : if (PetscAbsReal(c)>PetscAbsReal(s)) { /* Type I */
87 106831 : t = s/c;
88 2675825 : for (i=0;i<n;i++) {
89 2568994 : x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
90 2568994 : x2[i*inc2] = t*x1[i*inc1] + x2[i*inc2]/c;
91 : }
92 : } else { /* Type II */
93 18229 : t = c/s;
94 358493 : for (i=0;i<n;i++) {
95 340264 : tmp = x1[i*inc1];
96 340264 : x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
97 340264 : x2[i*inc2] = t*x1[i*inc1] + tmp/s;
98 : }
99 : }
100 125060 : PetscFunctionReturn(PETSC_SUCCESS);
101 : }
102 :
103 : /*
104 : Reduction to tridiagonal-diagonal form (see F. Tisseur, SIMAX 26(1), 2004).
105 :
106 : Input:
107 : A symmetric (only lower triangular part is referred)
108 : s vector +1 and -1 (signature matrix)
109 : Output:
110 : d,e
111 : s
112 : Q s-orthogonal matrix with Q^T*A*Q = T (symmetric tridiagonal matrix)
113 : */
114 2703 : static PetscErrorCode TridiagDiag_HHR(PetscInt n,PetscScalar *A,PetscInt lda,PetscReal *s,PetscScalar* Q,PetscInt ldq,PetscBool flip,PetscReal *d,PetscReal *e,PetscInt *perm_,PetscScalar *work,PetscReal *rwork,PetscBLASInt *iwork)
115 : {
116 2703 : PetscInt i,j,k,*ii,*jj,i0=0,ik=0,type;
117 2703 : PetscInt nwu=0;
118 2703 : PetscReal *ss,cond=1.0,cs,sn,r;
119 2703 : PetscScalar tau,t,*AA;
120 2703 : PetscBLASInt n0,n1,ni,inc=1,m,n_,lda_,ldq_,*perm,tmp;
121 2703 : PetscBool breakdown = PETSC_TRUE;
122 :
123 2703 : PetscFunctionBegin;
124 2703 : if (n<3) {
125 0 : if (n==1) Q[0]=1;
126 0 : if (n==2) {
127 0 : Q[0] = Q[1+ldq] = 1;
128 0 : Q[1] = Q[ldq] = 0;
129 : }
130 0 : PetscFunctionReturn(PETSC_SUCCESS);
131 : }
132 2703 : PetscCall(PetscBLASIntCast(lda,&lda_));
133 2703 : PetscCall(PetscBLASIntCast(n,&n_));
134 2703 : PetscCall(PetscBLASIntCast(ldq,&ldq_));
135 2703 : ss = rwork;
136 2703 : perm = iwork;
137 2703 : AA = work;
138 47042 : for (i=0;i<n;i++) PetscCall(PetscArraycpy(AA+i*n,A+i*lda,n));
139 2703 : nwu += n*n;
140 2703 : k=0;
141 5406 : while (breakdown && k<n) {
142 2703 : breakdown = PETSC_FALSE;
143 : /* Classify (and flip) A and s according to sign */
144 2703 : if (flip) {
145 46998 : for (i=0;i<n;i++) {
146 44299 : PetscCall(PetscBLASIntCast(n-1-perm_[i],&perm[i]));
147 44299 : if (perm[i]==0) i0 = i;
148 44299 : if (perm[i]==k) ik = i;
149 : }
150 : } else {
151 44 : for (i=0;i<n;i++) {
152 40 : PetscCall(PetscBLASIntCast(perm_[i],&perm[i]));
153 40 : if (perm[i]==0) i0 = i;
154 40 : if (perm[i]==k) ik = i;
155 : }
156 : }
157 2703 : perm[ik] = 0;
158 2703 : PetscCall(PetscBLASIntCast(k,&perm[i0]));
159 2703 : i=1;
160 40522 : while (i<n-1 && s[perm[i-1]]==s[perm[0]]) {
161 37819 : if (s[perm[i]]!=s[perm[0]]) {
162 34503 : j=i+1;
163 36098 : while (j<n-1 && s[perm[j]]!=s[perm[0]])j++;
164 34503 : tmp = perm[i]; perm[i] = perm[j]; perm[j] = tmp;
165 : }
166 37819 : i++;
167 : }
168 47042 : for (i=0;i<n;i++) {
169 44339 : ss[i] = s[perm[i]];
170 : }
171 2703 : if (flip) {
172 : ii = &j;
173 : jj = &i;
174 : } else {
175 4 : ii = &i;
176 4 : jj = &j;
177 : }
178 47042 : for (i=0;i<n;i++)
179 830072 : for (j=0;j<n;j++)
180 785733 : A[i+j*lda] = AA[perm[*ii]+perm[*jj]*n];
181 : /* Initialize Q */
182 47042 : for (i=0;i<n;i++) {
183 44339 : PetscCall(PetscArrayzero(Q+i*ldq,n));
184 44339 : Q[perm[i]+i*ldq] = 1.0;
185 : }
186 40584 : for (ni=1;ni<n && ss[ni]==ss[0]; ni++);
187 2703 : n0 = ni-1;
188 2703 : n1 = n_-ni;
189 41636 : for (j=0;j<n-2;j++) {
190 38933 : PetscCall(PetscBLASIntCast(n-j-1,&m));
191 : /* Forming and applying reflectors */
192 38933 : if (n0 > 1) {
193 37511 : PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,A+ni-n0+j*lda,A+ni-n0+j*lda+1,&inc,&tau));
194 : /* Apply reflector */
195 37511 : if (PetscAbsScalar(tau) != 0.0) {
196 37511 : t=*(A+ni-n0+j*lda); *(A+ni-n0+j*lda)=1.0;
197 37511 : PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n0,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
198 37511 : PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
199 : /* Update Q */
200 37511 : PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0,A+ni-n0+j*lda,&inc,&tau,Q+(j+1)*ldq,&ldq_,work+nwu));
201 37511 : *(A+ni-n0+j*lda) = t;
202 334956 : for (i=1;i<n0;i++) {
203 297445 : *(A+ni-n0+j*lda+i) = 0.0; *(A+j+(ni-n0+i)*lda) = 0.0;
204 : }
205 37511 : *(A+j+(ni-n0)*lda) = *(A+ni-n0+j*lda);
206 : }
207 : }
208 38933 : if (n1 > 1) {
209 1312 : PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1,A+n-n1+j*lda,A+n-n1+j*lda+1,&inc,&tau));
210 : /* Apply reflector */
211 1312 : if (PetscAbsScalar(tau) != 0.0) {
212 1312 : t=*(A+n-n1+j*lda); *(A+n-n1+j*lda)=1.0;
213 1312 : PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n1,A+n-n1+j*lda,&inc,&tau,A+j+1+(n-n1)*lda,&lda_,work+nwu));
214 1312 : PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1,&m,A+n-n1+j*lda,&inc,&tau,A+n-n1+(j+1)*lda,&lda_,work+nwu));
215 : /* Update Q */
216 1312 : PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1,A+n-n1+j*lda,&inc,&tau,Q+(n-n1)*ldq,&ldq_,work+nwu));
217 1312 : *(A+n-n1+j*lda) = t;
218 9492 : for (i=1;i<n1;i++) {
219 8180 : *(A+n-n1+i+j*lda) = 0.0; *(A+j+(n-n1+i)*lda) = 0.0;
220 : }
221 1312 : *(A+j+(n-n1)*lda) = *(A+n-n1+j*lda);
222 : }
223 : }
224 : /* Hyperbolic rotation */
225 38933 : if (n0 > 0 && n1 > 0) {
226 23436 : PetscCall(HRGen(PetscRealPart(A[ni-n0+j*lda]),PetscRealPart(A[n-n1+j*lda]),&type,&cs,&sn,&r,&cond));
227 : /* Check condition number */
228 23436 : if (cond > 1.0/(10*PETSC_SQRT_MACHINE_EPSILON)) {
229 0 : breakdown = PETSC_TRUE;
230 0 : k++;
231 0 : PetscCheck(k<n && !flip,PETSC_COMM_SELF,PETSC_ERR_SUP,"Breakdown in construction of hyperbolic transformation");
232 : break;
233 : }
234 23436 : A[ni-n0+j*lda] = r; A[n-n1+j*lda] = 0.0;
235 23436 : A[j+(ni-n0)*lda] = r; A[j+(n-n1)*lda] = 0.0;
236 : /* Apply to A */
237 23436 : PetscCall(HRApply(m,A+j+1+(ni-n0)*lda,1,A+j+1+(n-n1)*lda,1,cs,-sn));
238 23436 : PetscCall(HRApply(m,A+ni-n0+(j+1)*lda,lda,A+n-n1+(j+1)*lda,lda,cs,-sn));
239 :
240 : /* Update Q */
241 23436 : PetscCall(HRApply(n,Q+(ni-n0)*ldq,1,Q+(n-n1)*ldq,1,cs,-sn));
242 23436 : if (type==2) {
243 2384 : ss[ni-n0] = -ss[ni-n0]; ss[n-n1] = -ss[n-n1];
244 2384 : n0++;ni++;n1--;
245 : }
246 : }
247 38933 : if (n0>0) n0--;
248 1166 : else n1--;
249 : }
250 : }
251 :
252 : /* flip matrices */
253 2703 : if (flip) {
254 44299 : for (i=0;i<n-1;i++) {
255 41600 : d[i] = PetscRealPart(A[n-i-1+(n-i-1)*lda]);
256 41600 : e[i] = PetscRealPart(A[n-i-1+(n-i-2)*lda]);
257 41600 : s[i] = ss[n-i-1];
258 : }
259 2699 : s[n-1] = ss[0];
260 2699 : d[n-1] = PetscRealPart(A[0]);
261 46998 : for (i=0;i<n;i++) PetscCall(PetscArraycpy(work+i*n,Q+i*ldq,n));
262 46998 : for (i=0;i<n;i++)
263 829632 : for (j=0;j<n;j++)
264 785333 : Q[i+j*ldq] = work[i+(n-j-1)*n];
265 : } else {
266 40 : for (i=0;i<n-1;i++) {
267 36 : d[i] = PetscRealPart(A[i+i*lda]);
268 36 : e[i] = PetscRealPart(A[i+1+i*lda]);
269 36 : s[i] = ss[i];
270 : }
271 4 : s[n-1] = ss[n-1];
272 4 : d[n-1] = PetscRealPart(A[n-1 + (n-1)*lda]);
273 : }
274 2703 : PetscFunctionReturn(PETSC_SUCCESS);
275 : }
276 :
277 86179 : static PetscErrorCode MadeHRtr(PetscInt sz,PetscInt n,PetscInt idx0,PetscInt n0,PetscInt idx1,PetscInt n1,struct HRtr *tr1,struct HRtr *tr2,PetscReal *ncond,PetscScalar *work)
278 : {
279 86179 : PetscScalar *x,*y;
280 86179 : PetscReal ncond2=1.0;
281 86179 : PetscBLASInt n0_,n1_,inc=1;
282 :
283 86179 : PetscFunctionBegin;
284 : /* Hyperbolic transformation to make zeros in x */
285 86179 : x = tr1->data;
286 86179 : tr1->n[0] = n0;
287 86179 : tr1->n[1] = n1;
288 86179 : tr1->idx[0] = idx0;
289 86179 : tr1->idx[1] = idx1;
290 86179 : PetscCall(PetscBLASIntCast(tr1->n[0],&n0_));
291 86179 : PetscCall(PetscBLASIntCast(tr1->n[1],&n1_));
292 86179 : if (tr1->n[0] > 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,x+tr1->idx[0],x+tr1->idx[0]+1,&inc,tr1->tau));
293 86179 : if (tr1->n[1]> 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,x+tr1->idx[1],x+tr1->idx[1]+1,&inc,tr1->tau+1));
294 86179 : if (tr1->idx[0]<tr1->idx[1]) PetscCall(HRGen(PetscRealPart(x[tr1->idx[0]]),PetscRealPart(x[tr1->idx[1]]),&tr1->type,&tr1->cs,&tr1->sn,&tr1->alpha,ncond));
295 : else {
296 60504 : tr1->alpha = PetscRealPart(x[tr1->idx[0]]);
297 60504 : *ncond = 1.0;
298 : }
299 86179 : if (sz==2) {
300 5322 : y = tr2->data;
301 : /* Apply first transformation to second column */
302 5322 : if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
303 5094 : x[tr1->idx[0]] = 1.0;
304 5094 : PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&inc,x+tr1->idx[0],&inc,tr1->tau,y+tr1->idx[0],&n0_,work));
305 : }
306 5322 : if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
307 646 : x[tr1->idx[1]] = 1.0;
308 646 : PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&inc,x+tr1->idx[1],&inc,tr1->tau+1,y+tr1->idx[1],&n1_,work));
309 : }
310 5322 : if (tr1->idx[0]<tr1->idx[1]) PetscCall(HRApply(1,y+tr1->idx[0],1,y+tr1->idx[1],1,tr1->cs,-tr1->sn));
311 5322 : tr2->n[0] = tr1->n[0];
312 5322 : tr2->n[1] = tr1->n[1];
313 5322 : tr2->idx[0] = tr1->idx[0];
314 5322 : tr2->idx[1] = tr1->idx[1];
315 5322 : if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
316 2661 : tr2->idx[1]++; tr2->n[1]--; tr2->n[0]++;
317 : }
318 5322 : if (tr2->n[0]>0) {
319 5322 : tr2->n[0]--; tr2->idx[0]++;
320 5322 : if (tr2->n[1]==0) tr2->idx[1] = tr2->idx[0];
321 : } else {
322 0 : tr2->n[1]--; tr2->idx[1]++; tr2->idx[0] = tr2->idx[1];
323 : }
324 : /* Hyperbolic transformation to make zeros in y */
325 5322 : PetscCall(PetscBLASIntCast(tr2->n[0],&n0_));
326 5322 : PetscCall(PetscBLASIntCast(tr2->n[1],&n1_));
327 5322 : if (tr2->n[0] > 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,y+tr2->idx[0],y+tr2->idx[0]+1,&inc,tr2->tau));
328 5322 : if (tr2->n[1]> 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,y+tr2->idx[1],y+tr2->idx[1]+1,&inc,tr2->tau+1));
329 5322 : if (tr2->idx[0]<tr2->idx[1]) PetscCall(HRGen(PetscRealPart(y[tr2->idx[0]]),PetscRealPart(y[tr2->idx[1]]),&tr2->type,&tr2->cs,&tr2->sn,&tr2->alpha,&ncond2));
330 : else {
331 2452 : tr2->alpha = PetscRealPart(y[tr2->idx[0]]);
332 2452 : ncond2 = 1.0;
333 : }
334 5322 : if (ncond2>*ncond) *ncond = ncond2;
335 : }
336 86179 : PetscFunctionReturn(PETSC_SUCCESS);
337 : }
338 :
339 : /*
340 : Auxiliary function to try perform one iteration of hr routine,
341 : checking condition number. If it is < tolD, apply the
342 : transformation to H and R, if not, ok=false and it do nothing
343 : tolE, tolerance to exchange complex pairs to improve conditioning
344 : */
345 83518 : static PetscErrorCode TryHRIt(PetscInt n,PetscInt j,PetscInt sz,PetscScalar *H,PetscInt ldh,PetscScalar *R,PetscInt ldr,PetscReal *s,PetscBool *exg,PetscBool *ok,PetscInt *n0,PetscInt *n1,PetscInt *idx0,PetscInt *idx1,PetscReal *cond,PetscScalar *work)
346 : {
347 83518 : struct HRtr *tr1,*tr2,tr1_t,tr2_t,tr1_te,tr2_te;
348 83518 : PetscScalar *x,*y;
349 83518 : PetscReal ncond=0,ncond_e;
350 83518 : PetscInt nwu=0,i,d=1;
351 83518 : PetscBLASInt n0_,n1_,inc=1,mh,mr,n_,ldr_,ldh_;
352 83518 : PetscReal tolD = 1e+5;
353 :
354 83518 : PetscFunctionBegin;
355 83518 : if (cond) *cond = 1.0;
356 83518 : PetscCall(PetscBLASIntCast(n,&n_));
357 83518 : PetscCall(PetscBLASIntCast(ldr,&ldr_));
358 83518 : PetscCall(PetscBLASIntCast(ldh,&ldh_));
359 83518 : x = work+nwu;
360 83518 : nwu += n;
361 83518 : PetscCall(PetscArraycpy(x,R+j*ldr,n));
362 83518 : *exg = PETSC_FALSE;
363 83518 : *ok = PETSC_TRUE;
364 83518 : tr1_t.data = x;
365 83518 : if (sz==1) {
366 : /* Hyperbolic transformation to make zeros in x */
367 80857 : PetscCall(MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,NULL,&ncond,work+nwu));
368 : /* Check condition number to single column*/
369 80857 : if (ncond>tolD) *ok = PETSC_FALSE;
370 : tr1 = &tr1_t;
371 : tr2 = &tr2_t;
372 : } else {
373 2661 : y = work+nwu;
374 2661 : nwu += n;
375 2661 : PetscCall(PetscArraycpy(y,R+(j+1)*ldr,n));
376 2661 : tr2_t.data = y;
377 2661 : PetscCall(MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,&tr2_t,&ncond,work+nwu));
378 : /* Computing hyperbolic transformations also for exchanged vectors */
379 2661 : tr1_te.data = work+nwu;
380 2661 : nwu += n;
381 2661 : PetscCall(PetscArraycpy(tr1_te.data,R+(j+1)*ldr,n));
382 2661 : tr2_te.data = work+nwu;
383 2661 : nwu += n;
384 2661 : PetscCall(PetscArraycpy(tr2_te.data,R+j*ldr,n));
385 2661 : PetscCall(MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_te,&tr2_te,&ncond_e,work+nwu));
386 2661 : if (ncond > d*ncond_e) {
387 1566 : *exg = PETSC_TRUE;
388 1566 : tr1 = &tr1_te;
389 1566 : tr2 = &tr2_te;
390 1566 : ncond = ncond_e;
391 : } else {
392 : tr1 = &tr1_t;
393 : tr2 = &tr2_t;
394 : }
395 2661 : if (ncond>tolD) *ok = PETSC_FALSE;
396 : }
397 83518 : if (*ok) {
398 : /* Everything is OK, apply transformations to R and H */
399 : /* First column */
400 83518 : if (cond && *cond<ncond) *cond = ncond;
401 83518 : x = tr1->data;
402 83518 : PetscCall(PetscBLASIntCast(tr1->n[0],&n0_));
403 83518 : PetscCall(PetscBLASIntCast(tr1->n[1],&n1_));
404 83518 : PetscCall(PetscBLASIntCast(n-j-sz,&mr));
405 83518 : if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
406 77270 : x[tr1->idx[0]] = 1.0;
407 77270 : PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,x+tr1->idx[0],&inc,tr1->tau,R+(j+sz)*ldr+tr1->idx[0],&ldr_,work+nwu));
408 77270 : PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,x+tr1->idx[0],&inc,tr1->tau,H+(tr1->idx[0])*ldh,&ldh_,work+nwu));
409 : }
410 83518 : if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
411 8175 : x[tr1->idx[1]] = 1.0;
412 8175 : PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,x+tr1->idx[1],&inc,tr1->tau+1,R+(j+sz)*ldr+tr1->idx[1],&ldr_,work+nwu));
413 8175 : PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,x+tr1->idx[1],&inc,tr1->tau+1,H+(tr1->idx[1])*ldh,&ldh_,work+nwu));
414 : }
415 83518 : if (tr1->idx[0]<tr1->idx[1]) {
416 23014 : PetscCall(HRApply(mr,R+(j+sz)*ldr+tr1->idx[0],ldr,R+(j+sz)*ldr+tr1->idx[1],ldr,tr1->cs,-tr1->sn));
417 23014 : if (tr1->type==1) PetscCall(HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,tr1->cs,tr1->sn));
418 : else {
419 2645 : PetscCall(HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,-tr1->cs,-tr1->sn));
420 2645 : s[tr1->idx[0]] = -s[tr1->idx[0]];
421 2645 : s[tr1->idx[1]] = -s[tr1->idx[1]];
422 : }
423 : }
424 1560810 : for (i=0;i<tr1->idx[0];i++) *(R+j*ldr+i) = x[i];
425 1524619 : for (i=tr1->idx[0]+1;i<n;i++) *(R+j*ldr+i) = 0.0;
426 83518 : *(R+j*ldr+tr1->idx[0]) = tr1->alpha;
427 83518 : if (sz==2) {
428 2661 : y = tr2->data;
429 : /* Second column */
430 2661 : PetscCall(PetscBLASIntCast(tr2->n[0],&n0_));
431 2661 : PetscCall(PetscBLASIntCast(tr2->n[1],&n1_));
432 2661 : PetscCall(PetscBLASIntCast(n-j-sz,&mr));
433 2661 : PetscCall(PetscBLASIntCast(n-tr2->idx[0],&mh));
434 2661 : if (tr2->n[0] > 1 && PetscAbsScalar(tr2->tau[0])!=0.0) {
435 2530 : y[tr2->idx[0]] = 1.0;
436 2530 : PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,y+tr2->idx[0],&inc,tr2->tau,R+(j+2)*ldr+tr2->idx[0],&ldr_,work+nwu));
437 2530 : PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,y+tr2->idx[0],&inc,tr2->tau,H+(tr2->idx[0])*ldh,&ldh_,work+nwu));
438 : }
439 2661 : if (tr2->n[1] > 1 && PetscAbsScalar(tr2->tau[1])!=0.0) {
440 301 : y[tr2->idx[1]] = 1.0;
441 301 : PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,y+tr2->idx[1],&inc,tr2->tau+1,R+(j+2)*ldr+tr2->idx[1],&ldr_,work+nwu));
442 301 : PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,y+tr2->idx[1],&inc,tr2->tau+1,H+(tr2->idx[1])*ldh,&ldh_,work+nwu));
443 : }
444 2661 : if (tr2->idx[0]<tr2->idx[1]) {
445 1701 : PetscCall(HRApply(mr,R+(j+2)*ldr+tr2->idx[0],ldr,R+(j+2)*ldr+tr2->idx[1],ldr,tr2->cs,-tr2->sn));
446 1701 : if (tr2->type==1) PetscCall(HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,tr2->cs,tr2->sn));
447 : else {
448 1563 : PetscCall(HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,-tr2->cs,-tr2->sn));
449 1563 : s[tr2->idx[0]] = -s[tr2->idx[0]];
450 1563 : s[tr2->idx[1]] = -s[tr2->idx[1]];
451 : }
452 : }
453 28187 : for (i=0;i<tr2->idx[0]-1;i++) *(R+(j+1)*ldr+i) = y[i];
454 2661 : *(R+(j+1)*ldr+tr2->idx[0]-1) = y[tr2->idx[0]-1];
455 67039 : for (i=tr2->idx[0]+1;i<n;i++) *(R+(j+1)*ldr+i) = 0.0;
456 2661 : *(R+(j+1)*ldr+tr2->idx[0]) = tr2->alpha;
457 2661 : *n0 = tr2->n[0];
458 2661 : *n1 = tr2->n[1];
459 2661 : *idx0 = tr2->idx[0];
460 2661 : *idx1 = tr2->idx[1];
461 2661 : if (tr2->idx[0]<tr2->idx[1] && tr2->type==2) {
462 1563 : (*idx1)++; (*n1)--; (*n0)++;
463 : }
464 : } else {
465 80857 : *n0 = tr1->n[0];
466 80857 : *n1 = tr1->n[1];
467 80857 : *idx0 = tr1->idx[0];
468 80857 : *idx1 = tr1->idx[1];
469 80857 : if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
470 1606 : (*idx1)++; (*n1)--; (*n0)++;
471 : }
472 : }
473 83518 : if (*n0>0) {
474 80617 : (*n0)--; (*idx0)++;
475 80617 : if (*n1==0) *idx1 = *idx0;
476 : } else {
477 2901 : (*n1)--; (*idx1)++; *idx0 = *idx1;
478 : }
479 : }
480 83518 : PetscFunctionReturn(PETSC_SUCCESS);
481 : }
482 :
483 : /*
484 : compute V = HR whit H s-orthogonal and R upper triangular
485 : (work: work space of minimum size 6*nv)
486 : */
487 2755 : PetscErrorCode DSPseudoOrthog_HR(PetscInt *nv,PetscScalar *V,PetscInt ldv,PetscReal *s,PetscScalar *R,PetscInt ldr,PetscBLASInt *perm,PetscBLASInt *cmplxEig,PetscBool *breakdown,PetscScalar *work)
488 : {
489 2755 : PetscInt i,j,n,n0,n1,np,idx0,idx1,sz=1,k=0,nwu=0;
490 2755 : PetscBLASInt t1,t2;
491 2755 : PetscScalar *col1,*col2;
492 2755 : PetscBool exg=PETSC_FALSE,ok=PETSC_FALSE;
493 :
494 2755 : PetscFunctionBegin;
495 2755 : n = *nv;
496 2755 : col1 = work+nwu;
497 2755 : nwu += n;
498 2755 : col2 = work+nwu;
499 2755 : nwu += n;
500 : /* Sort R and s according to sing(s) */
501 2755 : np = 0;
502 88934 : for (i=0;i<n;i++) if (s[i]>0) np++;
503 2755 : if (s[0]>0) n1 = np;
504 159 : else n1 = n-np;
505 2755 : n0 = 0;
506 88934 : for (i=0;i<n;i++) {
507 86179 : if (s[i]==s[0]) {
508 79070 : s[n0] = s[0];
509 79070 : PetscCall(PetscBLASIntCast(i,&perm[n0++]));
510 86179 : } else PetscCall(PetscBLASIntCast(i,&perm[n1++]));
511 : }
512 9864 : for (i=n0;i<n;i++) s[i] = -s[0];
513 2755 : n1 -= n0;
514 2755 : idx0 = 0;
515 2755 : idx1 = n0;
516 2755 : if (idx1==n) idx1=idx0;
517 88934 : for (i=0;i<n;i++) {
518 3183316 : for (j=0;j<n;j++) R[j*ldr+i] = V[j*ldv+perm[i]];
519 : }
520 : /* Initialize H */
521 88934 : for (i=0;i<n;i++) {
522 86179 : PetscCall(PetscArrayzero(V+i*ldv,n));
523 86179 : V[perm[i]+i*ldv] = 1.0;
524 : }
525 88934 : for (i=0;i<n;i++) PetscCall(PetscBLASIntCast(i,&perm[i]));
526 : j = 0;
527 86273 : while (j<n-k) {
528 83518 : if (cmplxEig[j]==0) sz=1;
529 2661 : else sz=2;
530 83518 : PetscCall(TryHRIt(n,j,sz,V,ldv,R,ldr,s,&exg,&ok,&n0,&n1,&idx0,&idx1,NULL,work+nwu));
531 83518 : if (ok) {
532 83518 : if (exg) cmplxEig[j] = -cmplxEig[j];
533 83518 : j = j+sz;
534 : } else { /* to be discarded */
535 0 : k = k+1;
536 0 : if (cmplxEig[j]==0) {
537 0 : if (j<n) {
538 0 : t1 = perm[j];
539 0 : for (i=j;i<n-1;i++) perm[i] = perm[i+1];
540 0 : perm[n-1] = t1;
541 0 : t1 = cmplxEig[j];
542 0 : for (i=j;i<n-1;i++) cmplxEig[i] = cmplxEig[i+1];
543 0 : cmplxEig[n-1] = t1;
544 0 : PetscCall(PetscArraycpy(col1,R+j*ldr,n));
545 0 : for (i=j;i<n-1;i++) PetscCall(PetscArraycpy(R+i*ldr,R+(i+1)*ldr,n));
546 0 : PetscCall(PetscArraycpy(R+(n-1)*ldr,col1,n));
547 : }
548 : } else {
549 0 : k = k+1;
550 0 : if (j<n-1) {
551 0 : t1 = perm[j]; t2 = perm[j+1];
552 0 : for (i=j;i<n-2;i++) perm[i] = perm[i+2];
553 0 : perm[n-2] = t1; perm[n-1] = t2;
554 0 : t1 = cmplxEig[j]; t2 = cmplxEig[j+1];
555 0 : for (i=j;i<n-2;i++) cmplxEig[i] = cmplxEig[i+2];
556 0 : cmplxEig[n-2] = t1; cmplxEig[n-1] = t2;
557 0 : PetscCall(PetscArraycpy(col1,R+j*ldr,n));
558 0 : PetscCall(PetscArraycpy(col2,R+(j+1)*ldr,n));
559 0 : for (i=j;i<n-2;i++) PetscCall(PetscArraycpy(R+i*ldr,R+(i+2)*ldr,n));
560 0 : PetscCall(PetscArraycpy(R+(n-2)*ldr,col1,n));
561 86273 : PetscCall(PetscArraycpy(R+(n-1)*ldr,col2,n));
562 : }
563 : }
564 : }
565 : }
566 2755 : if (k!=0) {
567 0 : if (breakdown) *breakdown = PETSC_TRUE;
568 0 : *nv = n-k;
569 : }
570 2755 : PetscFunctionReturn(PETSC_SUCCESS);
571 : }
572 :
573 2753 : PetscErrorCode DSGHIEPOrthogEigenv(DS ds,DSMatType mat,PetscScalar *wr,PetscScalar *wi,PetscBool accum)
574 : {
575 2753 : PetscInt lws,nwus=0,nwui=0,lwi,off,n,nv,ld,i,ldr,l;
576 2753 : const PetscScalar *B,*W;
577 2753 : PetscScalar *Q,*X,*R,*ts,szero=0.0,sone=1.0;
578 2753 : PetscReal *s,vi,vr,tr,*d,*e;
579 2753 : PetscBLASInt ld_,n_,nv_,*perm,*cmplxEig;
580 :
581 2753 : PetscFunctionBegin;
582 2753 : l = ds->l;
583 2753 : n = ds->n-l;
584 2753 : PetscCall(PetscBLASIntCast(n,&n_));
585 2753 : ld = ds->ld;
586 2753 : PetscCall(PetscBLASIntCast(ld,&ld_));
587 2753 : off = l*ld+l;
588 2753 : PetscCall(DSGetArrayReal(ds,DS_MAT_D,&s));
589 2753 : if (!ds->compact) {
590 4 : PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_B],&B));
591 44 : for (i=l;i<ds->n;i++) s[i] = PetscRealPart(B[i*ld+i]);
592 4 : PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_B],&B));
593 : }
594 2753 : lws = n*n+7*n;
595 2753 : lwi = 2*n;
596 2753 : PetscCall(DSAllocateWork_Private(ds,lws,0,lwi));
597 2753 : R = ds->work+nwus;
598 2753 : nwus += n*n;
599 2753 : ldr = n;
600 2753 : perm = ds->iwork + nwui;
601 2753 : nwui += n;
602 2753 : cmplxEig = ds->iwork+nwui;
603 2753 : PetscCall(MatDenseGetArray(ds->omat[mat],&X));
604 86255 : for (i=0;i<n;i++) {
605 : #if defined(PETSC_USE_COMPLEX)
606 : vi = PetscImaginaryPart(wr[l+i]);
607 : #else
608 83502 : vi = wi?PetscRealPart(wi[l+i]):0.0;
609 : #endif
610 83502 : if (vi!=0) {
611 2661 : cmplxEig[i] = 1;
612 2661 : cmplxEig[i+1] = 2;
613 2661 : i++;
614 80841 : } else cmplxEig[i] = 0;
615 : }
616 2753 : nv = n;
617 :
618 : /* Perform HR decomposition */
619 : /* Hyperbolic rotators */
620 2753 : PetscCall(DSPseudoOrthog_HR(&nv,X+off,ld,s+l,R,ldr,perm,cmplxEig,NULL,ds->work+nwus));
621 : /* Sort wr,wi perm */
622 2753 : ts = ds->work+nwus;
623 2753 : PetscCall(PetscArraycpy(ts,wr+l,n));
624 88916 : for (i=0;i<n;i++) wr[i+l] = ts[perm[i]];
625 : #if !defined(PETSC_USE_COMPLEX)
626 2753 : if (wi) {
627 2753 : PetscCall(PetscArraycpy(ts,wi+l,n));
628 88916 : for (i=0;i<n;i++) wi[i+l] = ts[perm[i]];
629 : }
630 : #endif
631 : /* Projected Matrix */
632 2753 : PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
633 2753 : PetscCall(PetscArrayzero(d+2*ld,ld));
634 2753 : e = d+ld;
635 2753 : d[l+nv-1] = PetscRealPart(wr[l+nv-1]*s[l+nv-1]);
636 83523 : for (i=0;i<nv-1;i++) {
637 80770 : if (cmplxEig[i]==0) { /* Real */
638 78109 : d[l+i] = PetscRealPart(wr[l+i]*s[l+i]);
639 78109 : e[l+i] = 0.0;
640 : } else {
641 2661 : vr = PetscRealPart(wr[l+i]);
642 : #if defined(PETSC_USE_COMPLEX)
643 : vi = PetscImaginaryPart(wr[l+i]);
644 : #else
645 2661 : vi = wi?PetscRealPart(wi[l+i]):0.0;
646 : #endif
647 2661 : if (cmplxEig[i]==-1) vi = -vi;
648 2661 : tr = PetscRealPart((R[i+(i+1)*ldr]/R[i+i*ldr]))*vi;
649 2661 : d[l+i] = (vr-tr)*s[l+i];
650 2661 : d[l+i+1] = (vr+tr)*s[l+i+1];
651 2661 : e[l+i] = PetscRealPart(s[l+i]*(R[(i+1)+(i+1)*ldr]/R[i+i*ldr])*vi);
652 2661 : if (i<nv-2) e[l+i+1] = 0.0;
653 : i++;
654 : }
655 : }
656 2753 : PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
657 2753 : PetscCall(DSRestoreArrayReal(ds,DS_MAT_D,&s));
658 : /* accumulate previous Q */
659 2753 : if (accum) {
660 2750 : PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
661 2750 : PetscCall(PetscBLASIntCast(nv,&nv_));
662 2750 : PetscCall(DSAllocateMat_Private(ds,DS_MAT_W));
663 2750 : PetscCall(MatCopy(ds->omat[DS_MAT_Q],ds->omat[DS_MAT_W],SAME_NONZERO_PATTERN));
664 2750 : PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_W],&W));
665 2750 : PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,W+off,&ld_,X+off,&ld_,&szero,Q+off,&ld_));
666 2750 : PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_W],&W));
667 2750 : PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
668 : }
669 2753 : ds->t = nv+l;
670 2753 : PetscCall(MatDenseRestoreArray(ds->omat[mat],&X));
671 2753 : if (!ds->compact) PetscCall(DSSwitchFormat_GHIEP(ds,PETSC_FALSE));
672 2753 : PetscFunctionReturn(PETSC_SUCCESS);
673 : }
674 :
675 : /*
676 : Reduce to tridiagonal-diagonal pair by means of TridiagDiag_HHR.
677 : */
678 2753 : PetscErrorCode DSIntermediate_GHIEP(DS ds)
679 : {
680 2753 : PetscInt i,ld,off;
681 2753 : PetscInt nwall,nwallr,nwalli;
682 2753 : PetscScalar *A,*B,*Q;
683 2753 : PetscReal *d,*e,*s;
684 :
685 2753 : PetscFunctionBegin;
686 2753 : ld = ds->ld;
687 2753 : PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
688 2753 : PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&B));
689 2753 : PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
690 2753 : PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
691 2753 : PetscCall(DSGetArrayReal(ds,DS_MAT_D,&s));
692 2753 : e = d+ld;
693 2753 : off = ds->l+ds->l*ld;
694 2753 : PetscCall(PetscArrayzero(Q,ld*ld));
695 2753 : nwall = ld*ld+ld;
696 2753 : nwallr = ld;
697 2753 : nwalli = ld;
698 2753 : PetscCall(DSAllocateWork_Private(ds,nwall,nwallr,nwalli));
699 90711 : for (i=0;i<ds->n;i++) Q[i+i*ld]=1.0;
700 88916 : for (i=0;i<ds->n-ds->l;i++) *(ds->perm+i)=i;
701 2753 : if (ds->compact) {
702 2749 : if (ds->state < DS_STATE_INTERMEDIATE) {
703 2699 : PetscCall(DSSwitchFormat_GHIEP(ds,PETSC_FALSE));
704 2699 : PetscCall(TridiagDiag_HHR(ds->k-ds->l+1,A+off,ld,s+ds->l,Q+off,ld,PETSC_TRUE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork));
705 2699 : ds->k = ds->l;
706 2699 : PetscCall(PetscArrayzero(d+2*ld+ds->l,ds->n-ds->l));
707 : }
708 : } else {
709 4 : if (ds->state < DS_STATE_INTERMEDIATE) {
710 44 : for (i=0;i<ds->n;i++) s[i] = PetscRealPart(B[i+i*ld]);
711 4 : PetscCall(TridiagDiag_HHR(ds->n-ds->l,A+off,ld,s+ds->l,Q+off,ld,PETSC_FALSE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork));
712 4 : PetscCall(PetscArrayzero(d+2*ld,ds->n));
713 4 : ds->k = ds->l;
714 4 : PetscCall(DSSwitchFormat_GHIEP(ds,PETSC_FALSE));
715 0 : } else PetscCall(DSSwitchFormat_GHIEP(ds,PETSC_TRUE));
716 : }
717 2753 : PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
718 2753 : PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&B));
719 2753 : PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
720 2753 : PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
721 2753 : PetscCall(DSRestoreArrayReal(ds,DS_MAT_D,&s));
722 2753 : PetscFunctionReturn(PETSC_SUCCESS);
723 : }
|