Actual source code: invit.c
slepc-3.22.2 2024-12-02
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/dsimpl.h>
12: #include <slepcblaslapack.h>
14: struct HRtr
15: {
16: PetscScalar *data;
17: PetscInt m;
18: PetscInt idx[2];
19: PetscInt n[2];
20: PetscScalar tau[2];
21: PetscReal alpha;
22: PetscReal cs;
23: PetscReal sn;
24: PetscInt type;
25: };
27: /*
28: Generates a hyperbolic rotation
29: if x1*x1 - x2*x2 != 0
30: r = sqrt(|x1*x1 - x2*x2|)
31: c = x1/r s = x2/r
33: | c -s||x1| |d*r|
34: |-s c||x2| = | 0 |
35: where d = 1 for type==1 and -1 for type==2
36: Returns the condition number of the reduction
37: */
38: static PetscErrorCode HRGen(PetscReal x1,PetscReal x2,PetscInt *type,PetscReal *c,PetscReal *s,PetscReal *r,PetscReal *cond)
39: {
40: PetscReal t,n2,xa,xb;
41: PetscInt type_;
43: PetscFunctionBegin;
44: if (x2==0.0) {
45: *r = PetscAbsReal(x1); *c = (x1>=0.0)?1.0:-1.0; *s = 0.0;
46: if (type) *type = 1;
47: PetscFunctionReturn(PETSC_SUCCESS);
48: }
49: if (PetscAbsReal(x1) == PetscAbsReal(x2)) {
50: /* hyperbolic rotation doesn't exist */
51: *c = *s = *r = 0.0;
52: if (type) *type = 0;
53: *cond = PETSC_MAX_REAL;
54: PetscFunctionReturn(PETSC_SUCCESS);
55: }
57: if (PetscAbsReal(x1)>PetscAbsReal(x2)) {
58: xa = x1; xb = x2; type_ = 1;
59: } else {
60: xa = x2; xb = x1; type_ = 2;
61: }
62: t = xb/xa;
63: n2 = PetscAbsReal(1 - t*t);
64: *r = PetscSqrtReal(n2)*PetscAbsReal(xa);
65: *c = x1/(*r);
66: *s = x2/(*r);
67: if (type_ == 2) *r *= -1;
68: if (type) *type = type_;
69: if (cond) *cond = (PetscAbsReal(*c) + PetscAbsReal(*s))/PetscAbsReal(PetscAbsReal(*c) - PetscAbsReal(*s));
70: PetscFunctionReturn(PETSC_SUCCESS);
71: }
73: /*
74: |c s|
75: Applies an hyperbolic rotator |s c|
76: |c s|
77: [x1 x2]|s c|
78: */
79: static PetscErrorCode HRApply(PetscInt n,PetscScalar *x1,PetscInt inc1,PetscScalar *x2,PetscInt inc2,PetscReal c,PetscReal s)
80: {
81: PetscInt i;
82: PetscReal t;
83: PetscScalar tmp;
85: PetscFunctionBegin;
86: if (PetscAbsReal(c)>PetscAbsReal(s)) { /* Type I */
87: t = s/c;
88: for (i=0;i<n;i++) {
89: x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
90: x2[i*inc2] = t*x1[i*inc1] + x2[i*inc2]/c;
91: }
92: } else { /* Type II */
93: t = c/s;
94: for (i=0;i<n;i++) {
95: tmp = x1[i*inc1];
96: x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
97: x2[i*inc2] = t*x1[i*inc1] + tmp/s;
98: }
99: }
100: PetscFunctionReturn(PETSC_SUCCESS);
101: }
103: /*
104: Reduction to tridiagonal-diagonal form (see F. Tisseur, SIMAX 26(1), 2004).
106: Input:
107: A symmetric (only lower triangular part is referred)
108: s vector +1 and -1 (signature matrix)
109: Output:
110: d,e
111: s
112: Q s-orthogonal matrix with Q^T*A*Q = T (symmetric tridiagonal matrix)
113: */
114: static PetscErrorCode TridiagDiag_HHR(PetscInt n,PetscScalar *A,PetscInt lda,PetscReal *s,PetscScalar* Q,PetscInt ldq,PetscBool flip,PetscReal *d,PetscReal *e,PetscInt *perm_,PetscScalar *work,PetscReal *rwork,PetscBLASInt *iwork)
115: {
116: PetscInt i,j,k,*ii,*jj,i0=0,ik=0,type;
117: PetscInt nwu=0;
118: PetscReal *ss,cond=1.0,cs,sn,r;
119: PetscScalar tau,t,*AA;
120: PetscBLASInt n0,n1,ni,inc=1,m,n_,lda_,ldq_,*perm,tmp;
121: PetscBool breakdown = PETSC_TRUE;
123: PetscFunctionBegin;
124: if (n<3) {
125: if (n==1) Q[0]=1;
126: if (n==2) {
127: Q[0] = Q[1+ldq] = 1;
128: Q[1] = Q[ldq] = 0;
129: }
130: PetscFunctionReturn(PETSC_SUCCESS);
131: }
132: PetscCall(PetscBLASIntCast(lda,&lda_));
133: PetscCall(PetscBLASIntCast(n,&n_));
134: PetscCall(PetscBLASIntCast(ldq,&ldq_));
135: ss = rwork;
136: perm = iwork;
137: AA = work;
138: for (i=0;i<n;i++) PetscCall(PetscArraycpy(AA+i*n,A+i*lda,n));
139: nwu += n*n;
140: k=0;
141: while (breakdown && k<n) {
142: breakdown = PETSC_FALSE;
143: /* Classify (and flip) A and s according to sign */
144: if (flip) {
145: for (i=0;i<n;i++) {
146: PetscCall(PetscBLASIntCast(n-1-perm_[i],&perm[i]));
147: if (perm[i]==0) i0 = i;
148: if (perm[i]==k) ik = i;
149: }
150: } else {
151: for (i=0;i<n;i++) {
152: PetscCall(PetscBLASIntCast(perm_[i],&perm[i]));
153: if (perm[i]==0) i0 = i;
154: if (perm[i]==k) ik = i;
155: }
156: }
157: perm[ik] = 0;
158: PetscCall(PetscBLASIntCast(k,&perm[i0]));
159: i=1;
160: while (i<n-1 && s[perm[i-1]]==s[perm[0]]) {
161: if (s[perm[i]]!=s[perm[0]]) {
162: j=i+1;
163: while (j<n-1 && s[perm[j]]!=s[perm[0]])j++;
164: tmp = perm[i]; perm[i] = perm[j]; perm[j] = tmp;
165: }
166: i++;
167: }
168: for (i=0;i<n;i++) {
169: ss[i] = s[perm[i]];
170: }
171: if (flip) {
172: ii = &j;
173: jj = &i;
174: } else {
175: ii = &i;
176: jj = &j;
177: }
178: for (i=0;i<n;i++)
179: for (j=0;j<n;j++)
180: A[i+j*lda] = AA[perm[*ii]+perm[*jj]*n];
181: /* Initialize Q */
182: for (i=0;i<n;i++) {
183: PetscCall(PetscArrayzero(Q+i*ldq,n));
184: Q[perm[i]+i*ldq] = 1.0;
185: }
186: for (ni=1;ni<n && ss[ni]==ss[0]; ni++);
187: n0 = ni-1;
188: n1 = n_-ni;
189: for (j=0;j<n-2;j++) {
190: PetscCall(PetscBLASIntCast(n-j-1,&m));
191: /* Forming and applying reflectors */
192: if (n0 > 1) {
193: PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,A+ni-n0+j*lda,A+ni-n0+j*lda+1,&inc,&tau));
194: /* Apply reflector */
195: if (PetscAbsScalar(tau) != 0.0) {
196: t=*(A+ni-n0+j*lda); *(A+ni-n0+j*lda)=1.0;
197: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n0,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
198: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
199: /* Update Q */
200: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0,A+ni-n0+j*lda,&inc,&tau,Q+(j+1)*ldq,&ldq_,work+nwu));
201: *(A+ni-n0+j*lda) = t;
202: for (i=1;i<n0;i++) {
203: *(A+ni-n0+j*lda+i) = 0.0; *(A+j+(ni-n0+i)*lda) = 0.0;
204: }
205: *(A+j+(ni-n0)*lda) = *(A+ni-n0+j*lda);
206: }
207: }
208: if (n1 > 1) {
209: PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1,A+n-n1+j*lda,A+n-n1+j*lda+1,&inc,&tau));
210: /* Apply reflector */
211: if (PetscAbsScalar(tau) != 0.0) {
212: t=*(A+n-n1+j*lda); *(A+n-n1+j*lda)=1.0;
213: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n1,A+n-n1+j*lda,&inc,&tau,A+j+1+(n-n1)*lda,&lda_,work+nwu));
214: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1,&m,A+n-n1+j*lda,&inc,&tau,A+n-n1+(j+1)*lda,&lda_,work+nwu));
215: /* Update Q */
216: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1,A+n-n1+j*lda,&inc,&tau,Q+(n-n1)*ldq,&ldq_,work+nwu));
217: *(A+n-n1+j*lda) = t;
218: for (i=1;i<n1;i++) {
219: *(A+n-n1+i+j*lda) = 0.0; *(A+j+(n-n1+i)*lda) = 0.0;
220: }
221: *(A+j+(n-n1)*lda) = *(A+n-n1+j*lda);
222: }
223: }
224: /* Hyperbolic rotation */
225: if (n0 > 0 && n1 > 0) {
226: PetscCall(HRGen(PetscRealPart(A[ni-n0+j*lda]),PetscRealPart(A[n-n1+j*lda]),&type,&cs,&sn,&r,&cond));
227: /* Check condition number */
228: if (cond > 1.0/(10*PETSC_SQRT_MACHINE_EPSILON)) {
229: breakdown = PETSC_TRUE;
230: k++;
231: PetscCheck(k<n && !flip,PETSC_COMM_SELF,PETSC_ERR_SUP,"Breakdown in construction of hyperbolic transformation");
232: break;
233: }
234: A[ni-n0+j*lda] = r; A[n-n1+j*lda] = 0.0;
235: A[j+(ni-n0)*lda] = r; A[j+(n-n1)*lda] = 0.0;
236: /* Apply to A */
237: PetscCall(HRApply(m,A+j+1+(ni-n0)*lda,1,A+j+1+(n-n1)*lda,1,cs,-sn));
238: PetscCall(HRApply(m,A+ni-n0+(j+1)*lda,lda,A+n-n1+(j+1)*lda,lda,cs,-sn));
240: /* Update Q */
241: PetscCall(HRApply(n,Q+(ni-n0)*ldq,1,Q+(n-n1)*ldq,1,cs,-sn));
242: if (type==2) {
243: ss[ni-n0] = -ss[ni-n0]; ss[n-n1] = -ss[n-n1];
244: n0++;ni++;n1--;
245: }
246: }
247: if (n0>0) n0--;
248: else n1--;
249: }
250: }
252: /* flip matrices */
253: if (flip) {
254: for (i=0;i<n-1;i++) {
255: d[i] = PetscRealPart(A[n-i-1+(n-i-1)*lda]);
256: e[i] = PetscRealPart(A[n-i-1+(n-i-2)*lda]);
257: s[i] = ss[n-i-1];
258: }
259: s[n-1] = ss[0];
260: d[n-1] = PetscRealPart(A[0]);
261: for (i=0;i<n;i++) PetscCall(PetscArraycpy(work+i*n,Q+i*ldq,n));
262: for (i=0;i<n;i++)
263: for (j=0;j<n;j++)
264: Q[i+j*ldq] = work[i+(n-j-1)*n];
265: } else {
266: for (i=0;i<n-1;i++) {
267: d[i] = PetscRealPart(A[i+i*lda]);
268: e[i] = PetscRealPart(A[i+1+i*lda]);
269: s[i] = ss[i];
270: }
271: s[n-1] = ss[n-1];
272: d[n-1] = PetscRealPart(A[n-1 + (n-1)*lda]);
273: }
274: PetscFunctionReturn(PETSC_SUCCESS);
275: }
277: static PetscErrorCode MadeHRtr(PetscInt sz,PetscInt n,PetscInt idx0,PetscInt n0,PetscInt idx1,PetscInt n1,struct HRtr *tr1,struct HRtr *tr2,PetscReal *ncond,PetscScalar *work)
278: {
279: PetscScalar *x,*y;
280: PetscReal ncond2=1.0;
281: PetscBLASInt n0_,n1_,inc=1;
283: PetscFunctionBegin;
284: /* Hyperbolic transformation to make zeros in x */
285: x = tr1->data;
286: tr1->n[0] = n0;
287: tr1->n[1] = n1;
288: tr1->idx[0] = idx0;
289: tr1->idx[1] = idx1;
290: PetscCall(PetscBLASIntCast(tr1->n[0],&n0_));
291: PetscCall(PetscBLASIntCast(tr1->n[1],&n1_));
292: if (tr1->n[0] > 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,x+tr1->idx[0],x+tr1->idx[0]+1,&inc,tr1->tau));
293: if (tr1->n[1]> 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,x+tr1->idx[1],x+tr1->idx[1]+1,&inc,tr1->tau+1));
294: if (tr1->idx[0]<tr1->idx[1]) PetscCall(HRGen(PetscRealPart(x[tr1->idx[0]]),PetscRealPart(x[tr1->idx[1]]),&tr1->type,&tr1->cs,&tr1->sn,&tr1->alpha,ncond));
295: else {
296: tr1->alpha = PetscRealPart(x[tr1->idx[0]]);
297: *ncond = 1.0;
298: }
299: if (sz==2) {
300: y = tr2->data;
301: /* Apply first transformation to second column */
302: if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
303: x[tr1->idx[0]] = 1.0;
304: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&inc,x+tr1->idx[0],&inc,tr1->tau,y+tr1->idx[0],&n0_,work));
305: }
306: if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
307: x[tr1->idx[1]] = 1.0;
308: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&inc,x+tr1->idx[1],&inc,tr1->tau+1,y+tr1->idx[1],&n1_,work));
309: }
310: if (tr1->idx[0]<tr1->idx[1]) PetscCall(HRApply(1,y+tr1->idx[0],1,y+tr1->idx[1],1,tr1->cs,-tr1->sn));
311: tr2->n[0] = tr1->n[0];
312: tr2->n[1] = tr1->n[1];
313: tr2->idx[0] = tr1->idx[0];
314: tr2->idx[1] = tr1->idx[1];
315: if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
316: tr2->idx[1]++; tr2->n[1]--; tr2->n[0]++;
317: }
318: if (tr2->n[0]>0) {
319: tr2->n[0]--; tr2->idx[0]++;
320: if (tr2->n[1]==0) tr2->idx[1] = tr2->idx[0];
321: } else {
322: tr2->n[1]--; tr2->idx[1]++; tr2->idx[0] = tr2->idx[1];
323: }
324: /* Hyperbolic transformation to make zeros in y */
325: PetscCall(PetscBLASIntCast(tr2->n[0],&n0_));
326: PetscCall(PetscBLASIntCast(tr2->n[1],&n1_));
327: if (tr2->n[0] > 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,y+tr2->idx[0],y+tr2->idx[0]+1,&inc,tr2->tau));
328: if (tr2->n[1]> 1) PetscCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,y+tr2->idx[1],y+tr2->idx[1]+1,&inc,tr2->tau+1));
329: if (tr2->idx[0]<tr2->idx[1]) PetscCall(HRGen(PetscRealPart(y[tr2->idx[0]]),PetscRealPart(y[tr2->idx[1]]),&tr2->type,&tr2->cs,&tr2->sn,&tr2->alpha,&ncond2));
330: else {
331: tr2->alpha = PetscRealPart(y[tr2->idx[0]]);
332: ncond2 = 1.0;
333: }
334: if (ncond2>*ncond) *ncond = ncond2;
335: }
336: PetscFunctionReturn(PETSC_SUCCESS);
337: }
339: /*
340: Auxiliary function to try perform one iteration of hr routine,
341: checking condition number. If it is < tolD, apply the
342: transformation to H and R, if not, ok=false and it do nothing
343: tolE, tolerance to exchange complex pairs to improve conditioning
344: */
345: static PetscErrorCode TryHRIt(PetscInt n,PetscInt j,PetscInt sz,PetscScalar *H,PetscInt ldh,PetscScalar *R,PetscInt ldr,PetscReal *s,PetscBool *exg,PetscBool *ok,PetscInt *n0,PetscInt *n1,PetscInt *idx0,PetscInt *idx1,PetscReal *cond,PetscScalar *work)
346: {
347: struct HRtr *tr1,*tr2,tr1_t,tr2_t,tr1_te,tr2_te;
348: PetscScalar *x,*y;
349: PetscReal ncond=0,ncond_e;
350: PetscInt nwu=0,i,d=1;
351: PetscBLASInt n0_,n1_,inc=1,mh,mr,n_,ldr_,ldh_;
352: PetscReal tolD = 1e+5;
354: PetscFunctionBegin;
355: if (cond) *cond = 1.0;
356: PetscCall(PetscBLASIntCast(n,&n_));
357: PetscCall(PetscBLASIntCast(ldr,&ldr_));
358: PetscCall(PetscBLASIntCast(ldh,&ldh_));
359: x = work+nwu;
360: nwu += n;
361: PetscCall(PetscArraycpy(x,R+j*ldr,n));
362: *exg = PETSC_FALSE;
363: *ok = PETSC_TRUE;
364: tr1_t.data = x;
365: if (sz==1) {
366: /* Hyperbolic transformation to make zeros in x */
367: PetscCall(MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,NULL,&ncond,work+nwu));
368: /* Check condition number to single column*/
369: if (ncond>tolD) *ok = PETSC_FALSE;
370: tr1 = &tr1_t;
371: tr2 = &tr2_t;
372: } else {
373: y = work+nwu;
374: nwu += n;
375: PetscCall(PetscArraycpy(y,R+(j+1)*ldr,n));
376: tr2_t.data = y;
377: PetscCall(MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,&tr2_t,&ncond,work+nwu));
378: /* Computing hyperbolic transformations also for exchanged vectors */
379: tr1_te.data = work+nwu;
380: nwu += n;
381: PetscCall(PetscArraycpy(tr1_te.data,R+(j+1)*ldr,n));
382: tr2_te.data = work+nwu;
383: nwu += n;
384: PetscCall(PetscArraycpy(tr2_te.data,R+j*ldr,n));
385: PetscCall(MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_te,&tr2_te,&ncond_e,work+nwu));
386: if (ncond > d*ncond_e) {
387: *exg = PETSC_TRUE;
388: tr1 = &tr1_te;
389: tr2 = &tr2_te;
390: ncond = ncond_e;
391: } else {
392: tr1 = &tr1_t;
393: tr2 = &tr2_t;
394: }
395: if (ncond>tolD) *ok = PETSC_FALSE;
396: }
397: if (*ok) {
398: /* Everything is OK, apply transformations to R and H */
399: /* First column */
400: if (cond && *cond<ncond) *cond = ncond;
401: x = tr1->data;
402: PetscCall(PetscBLASIntCast(tr1->n[0],&n0_));
403: PetscCall(PetscBLASIntCast(tr1->n[1],&n1_));
404: PetscCall(PetscBLASIntCast(n-j-sz,&mr));
405: if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
406: x[tr1->idx[0]] = 1.0;
407: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,x+tr1->idx[0],&inc,tr1->tau,R+(j+sz)*ldr+tr1->idx[0],&ldr_,work+nwu));
408: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,x+tr1->idx[0],&inc,tr1->tau,H+(tr1->idx[0])*ldh,&ldh_,work+nwu));
409: }
410: if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
411: x[tr1->idx[1]] = 1.0;
412: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,x+tr1->idx[1],&inc,tr1->tau+1,R+(j+sz)*ldr+tr1->idx[1],&ldr_,work+nwu));
413: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,x+tr1->idx[1],&inc,tr1->tau+1,H+(tr1->idx[1])*ldh,&ldh_,work+nwu));
414: }
415: if (tr1->idx[0]<tr1->idx[1]) {
416: PetscCall(HRApply(mr,R+(j+sz)*ldr+tr1->idx[0],ldr,R+(j+sz)*ldr+tr1->idx[1],ldr,tr1->cs,-tr1->sn));
417: if (tr1->type==1) PetscCall(HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,tr1->cs,tr1->sn));
418: else {
419: PetscCall(HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,-tr1->cs,-tr1->sn));
420: s[tr1->idx[0]] = -s[tr1->idx[0]];
421: s[tr1->idx[1]] = -s[tr1->idx[1]];
422: }
423: }
424: for (i=0;i<tr1->idx[0];i++) *(R+j*ldr+i) = x[i];
425: for (i=tr1->idx[0]+1;i<n;i++) *(R+j*ldr+i) = 0.0;
426: *(R+j*ldr+tr1->idx[0]) = tr1->alpha;
427: if (sz==2) {
428: y = tr2->data;
429: /* Second column */
430: PetscCall(PetscBLASIntCast(tr2->n[0],&n0_));
431: PetscCall(PetscBLASIntCast(tr2->n[1],&n1_));
432: PetscCall(PetscBLASIntCast(n-j-sz,&mr));
433: PetscCall(PetscBLASIntCast(n-tr2->idx[0],&mh));
434: if (tr2->n[0] > 1 && PetscAbsScalar(tr2->tau[0])!=0.0) {
435: y[tr2->idx[0]] = 1.0;
436: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,y+tr2->idx[0],&inc,tr2->tau,R+(j+2)*ldr+tr2->idx[0],&ldr_,work+nwu));
437: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,y+tr2->idx[0],&inc,tr2->tau,H+(tr2->idx[0])*ldh,&ldh_,work+nwu));
438: }
439: if (tr2->n[1] > 1 && PetscAbsScalar(tr2->tau[1])!=0.0) {
440: y[tr2->idx[1]] = 1.0;
441: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,y+tr2->idx[1],&inc,tr2->tau+1,R+(j+2)*ldr+tr2->idx[1],&ldr_,work+nwu));
442: PetscCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,y+tr2->idx[1],&inc,tr2->tau+1,H+(tr2->idx[1])*ldh,&ldh_,work+nwu));
443: }
444: if (tr2->idx[0]<tr2->idx[1]) {
445: PetscCall(HRApply(mr,R+(j+2)*ldr+tr2->idx[0],ldr,R+(j+2)*ldr+tr2->idx[1],ldr,tr2->cs,-tr2->sn));
446: if (tr2->type==1) PetscCall(HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,tr2->cs,tr2->sn));
447: else {
448: PetscCall(HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,-tr2->cs,-tr2->sn));
449: s[tr2->idx[0]] = -s[tr2->idx[0]];
450: s[tr2->idx[1]] = -s[tr2->idx[1]];
451: }
452: }
453: for (i=0;i<tr2->idx[0]-1;i++) *(R+(j+1)*ldr+i) = y[i];
454: *(R+(j+1)*ldr+tr2->idx[0]-1) = y[tr2->idx[0]-1];
455: for (i=tr2->idx[0]+1;i<n;i++) *(R+(j+1)*ldr+i) = 0.0;
456: *(R+(j+1)*ldr+tr2->idx[0]) = tr2->alpha;
457: *n0 = tr2->n[0];
458: *n1 = tr2->n[1];
459: *idx0 = tr2->idx[0];
460: *idx1 = tr2->idx[1];
461: if (tr2->idx[0]<tr2->idx[1] && tr2->type==2) {
462: (*idx1)++; (*n1)--; (*n0)++;
463: }
464: } else {
465: *n0 = tr1->n[0];
466: *n1 = tr1->n[1];
467: *idx0 = tr1->idx[0];
468: *idx1 = tr1->idx[1];
469: if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
470: (*idx1)++; (*n1)--; (*n0)++;
471: }
472: }
473: if (*n0>0) {
474: (*n0)--; (*idx0)++;
475: if (*n1==0) *idx1 = *idx0;
476: } else {
477: (*n1)--; (*idx1)++; *idx0 = *idx1;
478: }
479: }
480: PetscFunctionReturn(PETSC_SUCCESS);
481: }
483: /*
484: compute V = HR whit H s-orthogonal and R upper triangular
485: (work: work space of minimum size 6*nv)
486: */
487: PetscErrorCode DSPseudoOrthog_HR(PetscInt *nv,PetscScalar *V,PetscInt ldv,PetscReal *s,PetscScalar *R,PetscInt ldr,PetscBLASInt *perm,PetscBLASInt *cmplxEig,PetscBool *breakdown,PetscScalar *work)
488: {
489: PetscInt i,j,n,n0,n1,np,idx0,idx1,sz=1,k=0,nwu=0;
490: PetscBLASInt t1,t2;
491: PetscScalar *col1,*col2;
492: PetscBool exg=PETSC_FALSE,ok=PETSC_FALSE;
494: PetscFunctionBegin;
495: n = *nv;
496: col1 = work+nwu;
497: nwu += n;
498: col2 = work+nwu;
499: nwu += n;
500: /* Sort R and s according to sing(s) */
501: np = 0;
502: for (i=0;i<n;i++) if (s[i]>0) np++;
503: if (s[0]>0) n1 = np;
504: else n1 = n-np;
505: n0 = 0;
506: for (i=0;i<n;i++) {
507: if (s[i]==s[0]) {
508: s[n0] = s[0];
509: PetscCall(PetscBLASIntCast(i,&perm[n0++]));
510: } else PetscCall(PetscBLASIntCast(i,&perm[n1++]));
511: }
512: for (i=n0;i<n;i++) s[i] = -s[0];
513: n1 -= n0;
514: idx0 = 0;
515: idx1 = n0;
516: if (idx1==n) idx1=idx0;
517: for (i=0;i<n;i++) {
518: for (j=0;j<n;j++) R[j*ldr+i] = V[j*ldv+perm[i]];
519: }
520: /* Initialize H */
521: for (i=0;i<n;i++) {
522: PetscCall(PetscArrayzero(V+i*ldv,n));
523: V[perm[i]+i*ldv] = 1.0;
524: }
525: for (i=0;i<n;i++) PetscCall(PetscBLASIntCast(i,&perm[i]));
526: j = 0;
527: while (j<n-k) {
528: if (cmplxEig[j]==0) sz=1;
529: else sz=2;
530: PetscCall(TryHRIt(n,j,sz,V,ldv,R,ldr,s,&exg,&ok,&n0,&n1,&idx0,&idx1,NULL,work+nwu));
531: if (ok) {
532: if (exg) cmplxEig[j] = -cmplxEig[j];
533: j = j+sz;
534: } else { /* to be discarded */
535: k = k+1;
536: if (cmplxEig[j]==0) {
537: if (j<n) {
538: t1 = perm[j];
539: for (i=j;i<n-1;i++) perm[i] = perm[i+1];
540: perm[n-1] = t1;
541: t1 = cmplxEig[j];
542: for (i=j;i<n-1;i++) cmplxEig[i] = cmplxEig[i+1];
543: cmplxEig[n-1] = t1;
544: PetscCall(PetscArraycpy(col1,R+j*ldr,n));
545: for (i=j;i<n-1;i++) PetscCall(PetscArraycpy(R+i*ldr,R+(i+1)*ldr,n));
546: PetscCall(PetscArraycpy(R+(n-1)*ldr,col1,n));
547: }
548: } else {
549: k = k+1;
550: if (j<n-1) {
551: t1 = perm[j]; t2 = perm[j+1];
552: for (i=j;i<n-2;i++) perm[i] = perm[i+2];
553: perm[n-2] = t1; perm[n-1] = t2;
554: t1 = cmplxEig[j]; t2 = cmplxEig[j+1];
555: for (i=j;i<n-2;i++) cmplxEig[i] = cmplxEig[i+2];
556: cmplxEig[n-2] = t1; cmplxEig[n-1] = t2;
557: PetscCall(PetscArraycpy(col1,R+j*ldr,n));
558: PetscCall(PetscArraycpy(col2,R+(j+1)*ldr,n));
559: for (i=j;i<n-2;i++) PetscCall(PetscArraycpy(R+i*ldr,R+(i+2)*ldr,n));
560: PetscCall(PetscArraycpy(R+(n-2)*ldr,col1,n));
561: PetscCall(PetscArraycpy(R+(n-1)*ldr,col2,n));
562: }
563: }
564: }
565: }
566: if (k!=0) {
567: if (breakdown) *breakdown = PETSC_TRUE;
568: *nv = n-k;
569: }
570: PetscFunctionReturn(PETSC_SUCCESS);
571: }
573: PetscErrorCode DSGHIEPOrthogEigenv(DS ds,DSMatType mat,PetscScalar *wr,PetscScalar *wi,PetscBool accum)
574: {
575: PetscInt lws,nwus=0,nwui=0,lwi,off,n,nv,ld,i,ldr,l;
576: const PetscScalar *B,*W;
577: PetscScalar *Q,*X,*R,*ts,szero=0.0,sone=1.0;
578: PetscReal *s,vi,vr,tr,*d,*e;
579: PetscBLASInt ld_,n_,nv_,*perm,*cmplxEig;
581: PetscFunctionBegin;
582: l = ds->l;
583: n = ds->n-l;
584: PetscCall(PetscBLASIntCast(n,&n_));
585: ld = ds->ld;
586: PetscCall(PetscBLASIntCast(ld,&ld_));
587: off = l*ld+l;
588: PetscCall(DSGetArrayReal(ds,DS_MAT_D,&s));
589: if (!ds->compact) {
590: PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_B],&B));
591: for (i=l;i<ds->n;i++) s[i] = PetscRealPart(B[i*ld+i]);
592: PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_B],&B));
593: }
594: lws = n*n+7*n;
595: lwi = 2*n;
596: PetscCall(DSAllocateWork_Private(ds,lws,0,lwi));
597: R = ds->work+nwus;
598: nwus += n*n;
599: ldr = n;
600: perm = ds->iwork + nwui;
601: nwui += n;
602: cmplxEig = ds->iwork+nwui;
603: PetscCall(MatDenseGetArray(ds->omat[mat],&X));
604: for (i=0;i<n;i++) {
605: #if defined(PETSC_USE_COMPLEX)
606: vi = PetscImaginaryPart(wr[l+i]);
607: #else
608: vi = wi?PetscRealPart(wi[l+i]):0.0;
609: #endif
610: if (vi!=0) {
611: cmplxEig[i] = 1;
612: cmplxEig[i+1] = 2;
613: i++;
614: } else cmplxEig[i] = 0;
615: }
616: nv = n;
618: /* Perform HR decomposition */
619: /* Hyperbolic rotators */
620: PetscCall(DSPseudoOrthog_HR(&nv,X+off,ld,s+l,R,ldr,perm,cmplxEig,NULL,ds->work+nwus));
621: /* Sort wr,wi perm */
622: ts = ds->work+nwus;
623: PetscCall(PetscArraycpy(ts,wr+l,n));
624: for (i=0;i<n;i++) wr[i+l] = ts[perm[i]];
625: #if !defined(PETSC_USE_COMPLEX)
626: if (wi) {
627: PetscCall(PetscArraycpy(ts,wi+l,n));
628: for (i=0;i<n;i++) wi[i+l] = ts[perm[i]];
629: }
630: #endif
631: /* Projected Matrix */
632: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
633: PetscCall(PetscArrayzero(d+2*ld,ld));
634: e = d+ld;
635: d[l+nv-1] = PetscRealPart(wr[l+nv-1]*s[l+nv-1]);
636: for (i=0;i<nv-1;i++) {
637: if (cmplxEig[i]==0) { /* Real */
638: d[l+i] = PetscRealPart(wr[l+i]*s[l+i]);
639: e[l+i] = 0.0;
640: } else {
641: vr = PetscRealPart(wr[l+i]);
642: #if defined(PETSC_USE_COMPLEX)
643: vi = PetscImaginaryPart(wr[l+i]);
644: #else
645: vi = wi?PetscRealPart(wi[l+i]):0.0;
646: #endif
647: if (cmplxEig[i]==-1) vi = -vi;
648: tr = PetscRealPart((R[i+(i+1)*ldr]/R[i+i*ldr]))*vi;
649: d[l+i] = (vr-tr)*s[l+i];
650: d[l+i+1] = (vr+tr)*s[l+i+1];
651: e[l+i] = PetscRealPart(s[l+i]*(R[(i+1)+(i+1)*ldr]/R[i+i*ldr])*vi);
652: if (i<nv-2) e[l+i+1] = 0.0;
653: i++;
654: }
655: }
656: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
657: PetscCall(DSRestoreArrayReal(ds,DS_MAT_D,&s));
658: /* accumulate previous Q */
659: if (accum) {
660: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
661: PetscCall(PetscBLASIntCast(nv,&nv_));
662: PetscCall(DSAllocateMat_Private(ds,DS_MAT_W));
663: PetscCall(MatCopy(ds->omat[DS_MAT_Q],ds->omat[DS_MAT_W],SAME_NONZERO_PATTERN));
664: PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_W],&W));
665: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,W+off,&ld_,X+off,&ld_,&szero,Q+off,&ld_));
666: PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_W],&W));
667: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
668: }
669: ds->t = nv+l;
670: PetscCall(MatDenseRestoreArray(ds->omat[mat],&X));
671: if (!ds->compact) PetscCall(DSSwitchFormat_GHIEP(ds,PETSC_FALSE));
672: PetscFunctionReturn(PETSC_SUCCESS);
673: }
675: /*
676: Reduce to tridiagonal-diagonal pair by means of TridiagDiag_HHR.
677: */
678: PetscErrorCode DSIntermediate_GHIEP(DS ds)
679: {
680: PetscInt i,ld,off;
681: PetscInt nwall,nwallr,nwalli;
682: PetscScalar *A,*B,*Q;
683: PetscReal *d,*e,*s;
685: PetscFunctionBegin;
686: ld = ds->ld;
687: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
688: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&B));
689: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
690: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
691: PetscCall(DSGetArrayReal(ds,DS_MAT_D,&s));
692: e = d+ld;
693: off = ds->l+ds->l*ld;
694: PetscCall(PetscArrayzero(Q,ld*ld));
695: nwall = ld*ld+ld;
696: nwallr = ld;
697: nwalli = ld;
698: PetscCall(DSAllocateWork_Private(ds,nwall,nwallr,nwalli));
699: for (i=0;i<ds->n;i++) Q[i+i*ld]=1.0;
700: for (i=0;i<ds->n-ds->l;i++) *(ds->perm+i)=i;
701: if (ds->compact) {
702: if (ds->state < DS_STATE_INTERMEDIATE) {
703: PetscCall(DSSwitchFormat_GHIEP(ds,PETSC_FALSE));
704: PetscCall(TridiagDiag_HHR(ds->k-ds->l+1,A+off,ld,s+ds->l,Q+off,ld,PETSC_TRUE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork));
705: ds->k = ds->l;
706: PetscCall(PetscArrayzero(d+2*ld+ds->l,ds->n-ds->l));
707: }
708: } else {
709: if (ds->state < DS_STATE_INTERMEDIATE) {
710: for (i=0;i<ds->n;i++) s[i] = PetscRealPart(B[i+i*ld]);
711: PetscCall(TridiagDiag_HHR(ds->n-ds->l,A+off,ld,s+ds->l,Q+off,ld,PETSC_FALSE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork));
712: PetscCall(PetscArrayzero(d+2*ld,ds->n));
713: ds->k = ds->l;
714: PetscCall(DSSwitchFormat_GHIEP(ds,PETSC_FALSE));
715: } else PetscCall(DSSwitchFormat_GHIEP(ds,PETSC_TRUE));
716: }
717: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
718: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&B));
719: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
720: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
721: PetscCall(DSRestoreArrayReal(ds,DS_MAT_D,&s));
722: PetscFunctionReturn(PETSC_SUCCESS);
723: }