Actual source code: dsgnhep.c
slepc-main 2024-12-17
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/dsimpl.h>
12: #include <slepcblaslapack.h>
14: /*
15: 1) Patterns of A and B
16: DS_STATE_RAW: DS_STATE_INTERM/CONDENSED
17: 0 n-1 0 n-1
18: ------------- -------------
19: 0 |* * * * * *| 0 |* * * * * *|
20: |* * * * * *| | * * * * *|
21: |* * * * * *| | * * * *|
22: |* * * * * *| | * * * *|
23: |* * * * * *| | * *|
24: n-1 |* * * * * *| n-1 | *|
25: ------------- -------------
27: 2) Moreover, P and Q are assumed to be the identity in DS_STATE_INTERMEDIATE.
28: */
30: static PetscErrorCode CleanDenseSchur(PetscInt n,PetscInt k,PetscScalar *S,PetscInt ldS,PetscScalar *T,PetscInt ldT,PetscScalar *X,PetscInt ldX,PetscScalar *Y,PetscInt ldY);
32: static PetscErrorCode DSAllocate_GNHEP(DS ds,PetscInt ld)
33: {
34: PetscFunctionBegin;
35: PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
36: PetscCall(DSAllocateMat_Private(ds,DS_MAT_B));
37: PetscCall(DSAllocateMat_Private(ds,DS_MAT_Z));
38: PetscCall(DSAllocateMat_Private(ds,DS_MAT_Q));
39: PetscCall(PetscFree(ds->perm));
40: PetscCall(PetscMalloc1(ld,&ds->perm));
41: PetscFunctionReturn(PETSC_SUCCESS);
42: }
44: static PetscErrorCode DSView_GNHEP(DS ds,PetscViewer viewer)
45: {
46: PetscViewerFormat format;
48: PetscFunctionBegin;
49: PetscCall(PetscViewerGetFormat(viewer,&format));
50: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
51: PetscCall(DSViewMat(ds,viewer,DS_MAT_A));
52: PetscCall(DSViewMat(ds,viewer,DS_MAT_B));
53: if (ds->state>DS_STATE_INTERMEDIATE) {
54: PetscCall(DSViewMat(ds,viewer,DS_MAT_Z));
55: PetscCall(DSViewMat(ds,viewer,DS_MAT_Q));
56: }
57: if (ds->omat[DS_MAT_X]) PetscCall(DSViewMat(ds,viewer,DS_MAT_X));
58: if (ds->omat[DS_MAT_Y]) PetscCall(DSViewMat(ds,viewer,DS_MAT_Y));
59: PetscFunctionReturn(PETSC_SUCCESS);
60: }
62: static PetscErrorCode DSVectors_GNHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
63: {
64: PetscInt i;
65: PetscBLASInt n,ld,mout,info,*select,mm,inc=1,cols=1,zero=0;
66: PetscScalar *X,*Y,*XY,*Z,*Q,*A,*B,fone=1.0,fzero=0.0;
67: PetscReal norm,done=1.0;
68: PetscBool iscomplex = PETSC_FALSE;
69: const char *side;
71: PetscFunctionBegin;
72: PetscCall(PetscBLASIntCast(ds->n,&n));
73: PetscCall(PetscBLASIntCast(ds->ld,&ld));
74: if (left) {
75: X = NULL;
76: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Y],&Y));
77: side = "L";
78: } else {
79: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_X],&X));
80: Y = NULL;
81: side = "R";
82: }
83: XY = left? Y: X;
84: PetscCall(DSAllocateWork_Private(ds,0,0,ld));
85: select = ds->iwork;
86: for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;
87: if (ds->state <= DS_STATE_INTERMEDIATE) {
88: PetscCall(DSSetIdentity(ds,DS_MAT_Q));
89: PetscCall(DSSetIdentity(ds,DS_MAT_Z));
90: }
91: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
92: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&B));
93: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
94: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Z],&Z));
95: PetscCall(CleanDenseSchur(n,0,A,ld,B,ld,Q,ld,Z,ld));
96: if (ds->state < DS_STATE_CONDENSED) PetscCall(DSSetState(ds,DS_STATE_CONDENSED));
98: /* compute k-th eigenvector */
99: select[*k] = (PetscBLASInt)PETSC_TRUE;
100: #if defined(PETSC_USE_COMPLEX)
101: mm = 1;
102: PetscCall(DSAllocateWork_Private(ds,2*ld,2*ld,0));
103: PetscCallBLAS("LAPACKtgevc",LAPACKtgevc_(side,"S",select,&n,A,&ld,B,&ld,PetscSafePointerPlusOffset(Y,(*k)*ld),&ld,PetscSafePointerPlusOffset(X,(*k)*ld),&ld,&mm,&mout,ds->work,ds->rwork,&info));
104: #else
105: if ((*k)<n-1 && (A[ld*(*k)+(*k)+1] != 0.0 || B[ld*(*k)+(*k)+1] != 0.0)) iscomplex = PETSC_TRUE;
106: mm = iscomplex? 2: 1;
107: if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
108: PetscCall(DSAllocateWork_Private(ds,6*ld,0,0));
109: PetscCallBLAS("LAPACKtgevc",LAPACKtgevc_(side,"S",select,&n,A,&ld,B,&ld,PetscSafePointerPlusOffset(Y,(*k)*ld),&ld,PetscSafePointerPlusOffset(X,(*k)*ld),&ld,&mm,&mout,ds->work,&info));
110: #endif
111: SlepcCheckLapackInfo("tgevc",info);
112: PetscCheck(select[*k] && mout==mm,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong arguments in call to Lapack xTGEVC");
113: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
114: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&B));
116: /* accumulate and normalize eigenvectors */
117: PetscCall(PetscArraycpy(ds->work,XY+(*k)*ld,mm*ld));
118: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&mm,&n,&fone,left?Z:Q,&ld,ds->work,&ld,&fzero,XY+(*k)*ld,&ld));
119: norm = BLASnrm2_(&n,XY+(*k)*ld,&inc);
120: #if !defined(PETSC_USE_COMPLEX)
121: if (iscomplex) {
122: norm = SlepcAbsEigenvalue(norm,BLASnrm2_(&n,XY+(*k+1)*ld,&inc));
123: cols = 2;
124: }
125: #endif
126: PetscCallBLAS("LAPACKlascl",LAPACKlascl_("G",&zero,&zero,&norm,&done,&n,&cols,XY+(*k)*ld,&ld,&info));
127: SlepcCheckLapackInfo("lascl",info);
128: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
129: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z));
131: /* set output arguments */
132: if (rnorm) {
133: if (iscomplex) *rnorm = SlepcAbsEigenvalue(XY[n-1+(*k)*ld],XY[n-1+(*k+1)*ld]);
134: else *rnorm = PetscAbsScalar(XY[n-1+(*k)*ld]);
135: }
136: if (iscomplex) (*k)++;
137: PetscCall(MatDenseRestoreArray(ds->omat[left?DS_MAT_Y:DS_MAT_X],&XY));
138: PetscFunctionReturn(PETSC_SUCCESS);
139: }
141: static PetscErrorCode DSVectors_GNHEP_Eigen_All(DS ds,PetscBool left)
142: {
143: PetscInt i;
144: PetscBLASInt n,ld,mout,info,inc = 1;
145: PetscBool iscomplex;
146: PetscScalar *X,*Y,*XY,*Q,*Z,*A,*B,tmp;
147: PetscReal norm;
148: const char *side,*back;
150: PetscFunctionBegin;
151: PetscCall(PetscBLASIntCast(ds->n,&n));
152: PetscCall(PetscBLASIntCast(ds->ld,&ld));
153: if (left) {
154: X = NULL;
155: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Y],&Y));
156: side = "L";
157: } else {
158: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_X],&X));
159: Y = NULL;
160: side = "R";
161: }
162: XY = left? Y: X;
163: if (ds->state <= DS_STATE_INTERMEDIATE) {
164: PetscCall(DSSetIdentity(ds,DS_MAT_Q));
165: PetscCall(DSSetIdentity(ds,DS_MAT_Z));
166: }
167: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
168: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&B));
169: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
170: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Z],&Z));
171: PetscCall(CleanDenseSchur(n,0,A,ld,B,ld,Q,ld,Z,ld));
172: if (ds->state>=DS_STATE_CONDENSED) {
173: /* DSSolve() has been called, backtransform with matrix Q */
174: back = "B";
175: PetscCall(PetscArraycpy(left?Y:X,left?Z:Q,ld*ld));
176: } else {
177: back = "A";
178: PetscCall(DSSetState(ds,DS_STATE_CONDENSED));
179: }
180: #if defined(PETSC_USE_COMPLEX)
181: PetscCall(DSAllocateWork_Private(ds,2*ld,2*ld,0));
182: PetscCallBLAS("LAPACKtgevc",LAPACKtgevc_(side,back,NULL,&n,A,&ld,B,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,ds->rwork,&info));
183: #else
184: PetscCall(DSAllocateWork_Private(ds,6*ld,0,0));
185: PetscCallBLAS("LAPACKtgevc",LAPACKtgevc_(side,back,NULL,&n,A,&ld,B,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,&info));
186: #endif
187: SlepcCheckLapackInfo("tgevc",info);
188: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
189: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z));
191: /* normalize eigenvectors */
192: for (i=0;i<n;i++) {
193: iscomplex = (i<n-1 && (A[i+1+i*ld]!=0.0 || B[i+1+i*ld]!=0.0))? PETSC_TRUE: PETSC_FALSE;
194: norm = BLASnrm2_(&n,XY+i*ld,&inc);
195: #if !defined(PETSC_USE_COMPLEX)
196: if (iscomplex) {
197: tmp = BLASnrm2_(&n,XY+(i+1)*ld,&inc);
198: norm = SlepcAbsEigenvalue(norm,tmp);
199: }
200: #endif
201: tmp = 1.0 / norm;
202: PetscCallBLAS("BLASscal",BLASscal_(&n,&tmp,XY+i*ld,&inc));
203: #if !defined(PETSC_USE_COMPLEX)
204: if (iscomplex) PetscCallBLAS("BLASscal",BLASscal_(&n,&tmp,XY+(i+1)*ld,&inc));
205: #endif
206: if (iscomplex) i++;
207: }
208: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
209: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&B));
210: PetscCall(MatDenseRestoreArray(ds->omat[left?DS_MAT_Y:DS_MAT_X],&XY));
211: PetscFunctionReturn(PETSC_SUCCESS);
212: }
214: static PetscErrorCode DSVectors_GNHEP(DS ds,DSMatType mat,PetscInt *k,PetscReal *rnorm)
215: {
216: PetscFunctionBegin;
217: switch (mat) {
218: case DS_MAT_X:
219: case DS_MAT_Y:
220: if (k) PetscCall(DSVectors_GNHEP_Eigen_Some(ds,k,rnorm,mat == DS_MAT_Y?PETSC_TRUE:PETSC_FALSE));
221: else PetscCall(DSVectors_GNHEP_Eigen_All(ds,mat == DS_MAT_Y?PETSC_TRUE:PETSC_FALSE));
222: break;
223: default:
224: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
225: }
226: PetscFunctionReturn(PETSC_SUCCESS);
227: }
229: static PetscErrorCode DSSort_GNHEP_Arbitrary(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
230: {
231: PetscInt i;
232: PetscBLASInt info,n,ld,mout,lwork,liwork,*iwork,*selection,zero_=0,true_=1;
233: PetscScalar *S,*T,*Q,*Z,*work,*beta;
235: PetscFunctionBegin;
236: if (!ds->sc) PetscFunctionReturn(PETSC_SUCCESS);
237: PetscCall(PetscBLASIntCast(ds->n,&n));
238: PetscCall(PetscBLASIntCast(ds->ld,&ld));
239: #if !defined(PETSC_USE_COMPLEX)
240: lwork = 4*n+16;
241: #else
242: lwork = 1;
243: #endif
244: liwork = 1;
245: PetscCall(DSAllocateWork_Private(ds,lwork+2*n,0,liwork+n));
246: beta = ds->work;
247: work = ds->work + n;
248: PetscCall(PetscBLASIntCast(ds->lwork-n,&lwork));
249: selection = ds->iwork;
250: iwork = ds->iwork + n;
251: PetscCall(PetscBLASIntCast(ds->liwork-n,&liwork));
252: /* Compute the selected eigenvalue to be in the leading position */
253: PetscCall(DSSortEigenvalues_Private(ds,rr,ri,ds->perm,PETSC_FALSE));
254: PetscCall(PetscArrayzero(selection,n));
255: for (i=0; i<*k; i++) selection[ds->perm[i]] = 1;
256: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&S));
257: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&T));
258: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
259: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Z],&Z));
260: #if !defined(PETSC_USE_COMPLEX)
261: PetscCallBLAS("LAPACKtgsen",LAPACKtgsen_(&zero_,&true_,&true_,selection,&n,S,&ld,T,&ld,wr,wi,beta,Z,&ld,Q,&ld,&mout,NULL,NULL,NULL,work,&lwork,iwork,&liwork,&info));
262: #else
263: PetscCallBLAS("LAPACKtgsen",LAPACKtgsen_(&zero_,&true_,&true_,selection,&n,S,&ld,T,&ld,wr,beta,Z,&ld,Q,&ld,&mout,NULL,NULL,NULL,work,&lwork,iwork,&liwork,&info));
264: #endif
265: SlepcCheckLapackInfo("tgsen",info);
266: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&S));
267: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&T));
268: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
269: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z));
270: *k = mout;
271: for (i=0;i<n;i++) {
272: if (beta[i]==0.0) wr[i] = (PetscRealPart(wr[i])>0.0)? PETSC_MAX_REAL: PETSC_MIN_REAL;
273: else wr[i] /= beta[i];
274: #if !defined(PETSC_USE_COMPLEX)
275: if (beta[i]==0.0) wi[i] = (wi[i]>0.0)? PETSC_MAX_REAL: PETSC_MIN_REAL;
276: else wi[i] /= beta[i];
277: #endif
278: }
279: PetscFunctionReturn(PETSC_SUCCESS);
280: }
282: static PetscErrorCode DSSort_GNHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi)
283: {
284: PetscScalar re;
285: PetscInt i,j,pos,result;
286: PetscBLASInt ifst,ilst,info,n,ld,one=1;
287: PetscScalar *S,*T,*Z,*Q;
288: #if !defined(PETSC_USE_COMPLEX)
289: PetscBLASInt lwork;
290: PetscScalar *work,a,safmin,scale1,scale2,im;
291: #endif
293: PetscFunctionBegin;
294: if (!ds->sc) PetscFunctionReturn(PETSC_SUCCESS);
295: PetscCall(PetscBLASIntCast(ds->n,&n));
296: PetscCall(PetscBLASIntCast(ds->ld,&ld));
297: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&S));
298: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&T));
299: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
300: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Z],&Z));
301: #if !defined(PETSC_USE_COMPLEX)
302: lwork = -1;
303: PetscCallBLAS("LAPACKtgexc",LAPACKtgexc_(&one,&one,&ld,NULL,&ld,NULL,&ld,NULL,&ld,NULL,&ld,&one,&one,&a,&lwork,&info));
304: SlepcCheckLapackInfo("tgexc",info);
305: safmin = LAPACKlamch_("S");
306: PetscCall(PetscBLASIntCast((PetscInt)a,&lwork));
307: PetscCall(DSAllocateWork_Private(ds,lwork,0,0));
308: work = ds->work;
309: #endif
310: /* selection sort */
311: for (i=ds->l;i<n-1;i++) {
312: re = wr[i];
313: #if !defined(PETSC_USE_COMPLEX)
314: im = wi[i];
315: #endif
316: pos = 0;
317: j = i+1; /* j points to the next eigenvalue */
318: #if !defined(PETSC_USE_COMPLEX)
319: if (im != 0) j=i+2;
320: #endif
321: /* find minimum eigenvalue */
322: for (;j<n;j++) {
323: #if !defined(PETSC_USE_COMPLEX)
324: PetscCall(SlepcSCCompare(ds->sc,re,im,wr[j],wi[j],&result));
325: #else
326: PetscCall(SlepcSCCompare(ds->sc,re,0.0,wr[j],0.0,&result));
327: #endif
328: if (result > 0) {
329: re = wr[j];
330: #if !defined(PETSC_USE_COMPLEX)
331: im = wi[j];
332: #endif
333: pos = j;
334: }
335: #if !defined(PETSC_USE_COMPLEX)
336: if (wi[j] != 0) j++;
337: #endif
338: }
339: if (pos) {
340: /* interchange blocks */
341: PetscCall(PetscBLASIntCast(pos+1,&ifst));
342: PetscCall(PetscBLASIntCast(i+1,&ilst));
343: #if !defined(PETSC_USE_COMPLEX)
344: PetscCallBLAS("LAPACKtgexc",LAPACKtgexc_(&one,&one,&n,S,&ld,T,&ld,Z,&ld,Q,&ld,&ifst,&ilst,work,&lwork,&info));
345: #else
346: PetscCallBLAS("LAPACKtgexc",LAPACKtgexc_(&one,&one,&n,S,&ld,T,&ld,Z,&ld,Q,&ld,&ifst,&ilst,&info));
347: #endif
348: SlepcCheckLapackInfo("tgexc",info);
349: /* recover original eigenvalues from T and S matrices */
350: for (j=i;j<n;j++) {
351: #if !defined(PETSC_USE_COMPLEX)
352: if (j<n-1 && S[j*ld+j+1] != 0.0) {
353: /* complex conjugate eigenvalue */
354: PetscCallBLAS("LAPACKlag2",LAPACKlag2_(S+j*ld+j,&ld,T+j*ld+j,&ld,&safmin,&scale1,&scale2,&re,&a,&im));
355: wr[j] = re / scale1;
356: wi[j] = im / scale1;
357: wr[j+1] = a / scale2;
358: wi[j+1] = -wi[j];
359: j++;
360: } else
361: #endif
362: {
363: if (T[j*ld+j] == 0.0) wr[j] = (PetscRealPart(S[j*ld+j])>0.0)? PETSC_MAX_REAL: PETSC_MIN_REAL;
364: else wr[j] = S[j*ld+j] / T[j*ld+j];
365: #if !defined(PETSC_USE_COMPLEX)
366: wi[j] = 0.0;
367: #endif
368: }
369: }
370: }
371: #if !defined(PETSC_USE_COMPLEX)
372: if (wi[i] != 0.0) i++;
373: #endif
374: }
375: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&S));
376: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&T));
377: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
378: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z));
379: PetscFunctionReturn(PETSC_SUCCESS);
380: }
382: static PetscErrorCode DSSort_GNHEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
383: {
384: PetscFunctionBegin;
385: if (!rr || wr == rr) PetscCall(DSSort_GNHEP_Total(ds,wr,wi));
386: else PetscCall(DSSort_GNHEP_Arbitrary(ds,wr,wi,rr,ri,k));
387: PetscFunctionReturn(PETSC_SUCCESS);
388: }
390: static PetscErrorCode DSUpdateExtraRow_GNHEP(DS ds)
391: {
392: PetscInt i;
393: PetscBLASInt n,ld,incx=1;
394: PetscScalar *A,*B,*x,*y,one=1.0,zero=0.0;
395: const PetscScalar *Q;
397: PetscFunctionBegin;
398: PetscCall(PetscBLASIntCast(ds->n,&n));
399: PetscCall(PetscBLASIntCast(ds->ld,&ld));
400: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
401: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&B));
402: PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_Q],&Q));
403: PetscCall(DSAllocateWork_Private(ds,2*ld,0,0));
404: x = ds->work;
405: y = ds->work+ld;
406: for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
407: PetscCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
408: for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
409: for (i=0;i<n;i++) x[i] = PetscConj(B[n+i*ld]);
410: PetscCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
411: for (i=0;i<n;i++) B[n+i*ld] = PetscConj(y[i]);
412: ds->k = n;
413: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
414: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&B));
415: PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_Q],&Q));
416: PetscFunctionReturn(PETSC_SUCCESS);
417: }
419: /*
420: Write zeros from the column k to n in the lower triangular part of the
421: matrices S and T, and inside 2-by-2 diagonal blocks of T in order to
422: make (S,T) a valid Schur decompositon.
423: */
424: static PetscErrorCode CleanDenseSchur(PetscInt n,PetscInt k,PetscScalar *S,PetscInt ldS,PetscScalar *T,PetscInt ldT,PetscScalar *X,PetscInt ldX,PetscScalar *Y,PetscInt ldY)
425: {
426: PetscInt i;
427: #if defined(PETSC_USE_COMPLEX)
428: PetscInt j;
429: PetscScalar s;
430: #else
431: PetscBLASInt ldS_,ldT_,n_i,n_i_2,one=1,n_,i_2,i_;
432: PetscScalar b11,b22,sr,cr,sl,cl;
433: #endif
435: PetscFunctionBegin;
436: #if defined(PETSC_USE_COMPLEX)
437: for (i=k; i<n; i++) {
438: /* Some functions need the diagonal elements in T be real */
439: if (T && PetscImaginaryPart(T[ldT*i+i]) != 0.0) {
440: s = PetscConj(T[ldT*i+i])/PetscAbsScalar(T[ldT*i+i]);
441: for (j=0;j<=i;j++) {
442: T[ldT*i+j] *= s;
443: S[ldS*i+j] *= s;
444: }
445: T[ldT*i+i] = PetscRealPart(T[ldT*i+i]);
446: if (X) for (j=0;j<n;j++) X[ldX*i+j] *= s;
447: }
448: j = i+1;
449: if (j<n) {
450: S[ldS*i+j] = 0.0;
451: if (T) T[ldT*i+j] = 0.0;
452: }
453: }
454: #else
455: PetscCall(PetscBLASIntCast(ldS,&ldS_));
456: PetscCall(PetscBLASIntCast(ldT,&ldT_));
457: PetscCall(PetscBLASIntCast(n,&n_));
458: for (i=k;i<n-1;i++) {
459: if (S[ldS*i+i+1] != 0.0) {
460: /* Check if T(i+1,i) and T(i,i+1) are zero */
461: if (T[ldT*(i+1)+i] != 0.0 || T[ldT*i+i+1] != 0.0) {
462: /* Check if T(i+1,i) and T(i,i+1) are negligible */
463: if (PetscAbs(T[ldT*(i+1)+i])+PetscAbs(T[ldT*i+i+1]) < (PetscAbs(T[ldT*i+i])+PetscAbs(T[ldT*(i+1)+i+1]))*PETSC_MACHINE_EPSILON) {
464: T[ldT*i+i+1] = 0.0;
465: T[ldT*(i+1)+i] = 0.0;
466: } else {
467: /* If one of T(i+1,i) or T(i,i+1) is negligible, we make zero the other element */
468: if (PetscAbs(T[ldT*i+i+1]) < (PetscAbs(T[ldT*i+i])+PetscAbs(T[ldT*(i+1)+i+1])+PetscAbs(T[ldT*(i+1)+i]))*PETSC_MACHINE_EPSILON) {
469: PetscCallBLAS("LAPACKlasv2",LAPACKlasv2_(&T[ldT*i+i],&T[ldT*(i+1)+i],&T[ldT*(i+1)+i+1],&b22,&b11,&sl,&cl,&sr,&cr));
470: } else if (PetscAbs(T[ldT*(i+1)+i]) < (PetscAbs(T[ldT*i+i])+PetscAbs(T[ldT*(i+1)+i+1])+PetscAbs(T[ldT*i+i+1]))*PETSC_MACHINE_EPSILON) {
471: PetscCallBLAS("LAPACKlasv2",LAPACKlasv2_(&T[ldT*i+i],&T[ldT*i+i+1],&T[ldT*(i+1)+i+1],&b22,&b11,&sr,&cr,&sl,&cl));
472: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported format. Call DSSolve before this function");
473: PetscCall(PetscBLASIntCast(n-i,&n_i));
474: n_i_2 = n_i - 2;
475: PetscCall(PetscBLASIntCast(i+2,&i_2));
476: PetscCall(PetscBLASIntCast(i,&i_));
477: if (b11 < 0.0) {
478: cr = -cr; sr = -sr;
479: b11 = -b11; b22 = -b22;
480: }
481: PetscCallBLAS("BLASrot",BLASrot_(&n_i,&S[ldS*i+i],&ldS_,&S[ldS*i+i+1],&ldS_,&cl,&sl));
482: PetscCallBLAS("BLASrot",BLASrot_(&i_2,&S[ldS*i],&one,&S[ldS*(i+1)],&one,&cr,&sr));
483: PetscCallBLAS("BLASrot",BLASrot_(&n_i_2,&T[ldT*(i+2)+i],&ldT_,&T[ldT*(i+2)+i+1],&ldT_,&cl,&sl));
484: PetscCallBLAS("BLASrot",BLASrot_(&i_,&T[ldT*i],&one,&T[ldT*(i+1)],&one,&cr,&sr));
485: if (X) PetscCallBLAS("BLASrot",BLASrot_(&n_,&X[ldX*i],&one,&X[ldX*(i+1)],&one,&cr,&sr));
486: if (Y) PetscCallBLAS("BLASrot",BLASrot_(&n_,&Y[ldY*i],&one,&Y[ldY*(i+1)],&one,&cl,&sl));
487: T[ldT*i+i] = b11; T[ldT*i+i+1] = 0.0;
488: T[ldT*(i+1)+i] = 0.0; T[ldT*(i+1)+i+1] = b22;
489: }
490: }
491: i++;
492: }
493: }
494: #endif
495: PetscFunctionReturn(PETSC_SUCCESS);
496: }
498: static PetscErrorCode DSSolve_GNHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
499: {
500: PetscScalar *work,*beta,a;
501: PetscInt i;
502: PetscBLASInt lwork,info,n,ld,iaux;
503: PetscScalar *A,*B,*Z,*Q;
504: PetscBool usegges3=(ds->method==1)?PETSC_TRUE:PETSC_FALSE;
506: PetscFunctionBegin;
507: #if !defined(PETSC_USE_COMPLEX)
508: PetscAssertPointer(wi,3);
509: #endif
510: PetscCall(PetscBLASIntCast(ds->n,&n));
511: PetscCall(PetscBLASIntCast(ds->ld,&ld));
512: lwork = -1;
513: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
514: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&B));
515: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
516: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Z],&Z));
517: #if !defined(PETSC_USE_COMPLEX)
518: if (usegges3) PetscCallBLAS("LAPACKgges3",LAPACKgges3_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,wi,NULL,Z,&ld,Q,&ld,&a,&lwork,NULL,&info));
519: else PetscCallBLAS("LAPACKgges",LAPACKgges_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,wi,NULL,Z,&ld,Q,&ld,&a,&lwork,NULL,&info));
520: PetscCall(PetscBLASIntCast((PetscInt)a,&lwork));
521: PetscCall(DSAllocateWork_Private(ds,lwork+ld,0,0));
522: beta = ds->work;
523: work = beta+ds->n;
524: PetscCall(PetscBLASIntCast(ds->lwork-ds->n,&lwork));
525: if (usegges3) PetscCallBLAS("LAPACKgges3",LAPACKgges3_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,wi,beta,Z,&ld,Q,&ld,work,&lwork,NULL,&info));
526: else PetscCallBLAS("LAPACKgges",LAPACKgges_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,wi,beta,Z,&ld,Q,&ld,work,&lwork,NULL,&info));
527: #else
528: if (usegges3) PetscCallBLAS("LAPACKgges3",LAPACKgges3_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,NULL,Z,&ld,Q,&ld,&a,&lwork,NULL,NULL,&info));
529: else PetscCallBLAS("LAPACKgges",LAPACKgges_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,NULL,Z,&ld,Q,&ld,&a,&lwork,NULL,NULL,&info));
530: PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork));
531: PetscCall(DSAllocateWork_Private(ds,lwork+ld,8*ld,0));
532: beta = ds->work;
533: work = beta+ds->n;
534: PetscCall(PetscBLASIntCast(ds->lwork-ds->n,&lwork));
535: if (usegges3) PetscCallBLAS("LAPACKgges3",LAPACKgges3_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,beta,Z,&ld,Q,&ld,work,&lwork,ds->rwork,NULL,&info));
536: else PetscCallBLAS("LAPACKgges",LAPACKgges_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,beta,Z,&ld,Q,&ld,work,&lwork,ds->rwork,NULL,&info));
537: #endif
538: SlepcCheckLapackInfo(usegges3?"gges3":"gges",info);
539: for (i=0;i<n;i++) {
540: if (beta[i]==0.0) wr[i] = (PetscRealPart(wr[i])>0.0)? PETSC_MAX_REAL: PETSC_MIN_REAL;
541: else wr[i] /= beta[i];
542: #if !defined(PETSC_USE_COMPLEX)
543: if (beta[i]==0.0) wi[i] = (wi[i]>0.0)? PETSC_MAX_REAL: PETSC_MIN_REAL;
544: else wi[i] /= beta[i];
545: #else
546: if (wi) wi[i] = 0.0;
547: #endif
548: }
549: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
550: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&B));
551: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
552: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z));
553: PetscFunctionReturn(PETSC_SUCCESS);
554: }
556: #if !defined(PETSC_HAVE_MPIUNI)
557: static PetscErrorCode DSSynchronize_GNHEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
558: {
559: PetscInt ld=ds->ld,l=ds->l,k;
560: PetscMPIInt n,rank,off=0,size,ldn;
561: PetscScalar *A,*B,*Q,*Z;
563: PetscFunctionBegin;
564: k = 2*(ds->n-l)*ld;
565: if (ds->state>DS_STATE_RAW) k += 2*(ds->n-l)*ld;
566: if (eigr) k += (ds->n-l);
567: if (eigi) k += (ds->n-l);
568: PetscCall(DSAllocateWork_Private(ds,k,0,0));
569: PetscCall(PetscMPIIntCast(k*sizeof(PetscScalar),&size));
570: PetscCall(PetscMPIIntCast(ds->n-l,&n));
571: PetscCall(PetscMPIIntCast(ld*(ds->n-l),&ldn));
572: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
573: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&B));
574: if (ds->state>DS_STATE_RAW) {
575: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
576: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Z],&Z));
577: }
578: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank));
579: if (!rank) {
580: PetscCallMPI(MPI_Pack(A+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
581: PetscCallMPI(MPI_Pack(B+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
582: if (ds->state>DS_STATE_RAW) {
583: PetscCallMPI(MPI_Pack(Q+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
584: PetscCallMPI(MPI_Pack(Z+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
585: }
586: if (eigr) PetscCallMPI(MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
587: #if !defined(PETSC_USE_COMPLEX)
588: if (eigi) PetscCallMPI(MPI_Pack(eigi+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
589: #endif
590: }
591: PetscCallMPI(MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds)));
592: if (rank) {
593: PetscCallMPI(MPI_Unpack(ds->work,size,&off,A+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
594: PetscCallMPI(MPI_Unpack(ds->work,size,&off,B+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
595: if (ds->state>DS_STATE_RAW) {
596: PetscCallMPI(MPI_Unpack(ds->work,size,&off,Q+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
597: PetscCallMPI(MPI_Unpack(ds->work,size,&off,Z+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
598: }
599: if (eigr) PetscCallMPI(MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
600: #if !defined(PETSC_USE_COMPLEX)
601: if (eigi) PetscCallMPI(MPI_Unpack(ds->work,size,&off,eigi+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
602: #endif
603: }
604: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
605: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&B));
606: if (ds->state>DS_STATE_RAW) {
607: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
608: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z));
609: }
610: PetscFunctionReturn(PETSC_SUCCESS);
611: }
612: #endif
614: static PetscErrorCode DSTruncate_GNHEP(DS ds,PetscInt n,PetscBool trim)
615: {
616: PetscInt i,ld=ds->ld,l=ds->l;
617: PetscScalar *A,*B;
619: PetscFunctionBegin;
620: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
621: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&B));
622: #if defined(PETSC_USE_DEBUG)
623: /* make sure diagonal 2x2 block is not broken */
624: PetscCheck(ds->state<DS_STATE_CONDENSED || n==0 || n==ds->n || (A[n+(n-1)*ld]==0.0 && B[n+(n-1)*ld]==0.0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The given size would break a 2x2 block, call DSGetTruncateSize() first");
625: #endif
626: if (trim) {
627: if (ds->extrarow) { /* clean extra row */
628: for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
629: for (i=l;i<ds->n;i++) B[ds->n+i*ld] = 0.0;
630: }
631: ds->l = 0;
632: ds->k = 0;
633: ds->n = n;
634: ds->t = ds->n; /* truncated length equal to the new dimension */
635: } else {
636: if (ds->extrarow && ds->k==ds->n) {
637: /* copy entries of extra row to the new position, then clean last row */
638: for (i=l;i<n;i++) A[n+i*ld] = A[ds->n+i*ld];
639: for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
640: for (i=l;i<n;i++) B[n+i*ld] = B[ds->n+i*ld];
641: for (i=l;i<ds->n;i++) B[ds->n+i*ld] = 0.0;
642: }
643: ds->k = (ds->extrarow)? n: 0;
644: ds->t = ds->n; /* truncated length equal to previous dimension */
645: ds->n = n;
646: }
647: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
648: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&B));
649: PetscFunctionReturn(PETSC_SUCCESS);
650: }
652: /*MC
653: DSGNHEP - Dense Generalized Non-Hermitian Eigenvalue Problem.
655: Level: beginner
657: Notes:
658: The problem is expressed as A*X = B*X*Lambda, where (A,B) is the input
659: matrix pencil. Lambda is a diagonal matrix whose diagonal elements are the
660: arguments of DSSolve(). After solve, (A,B) is overwritten with the
661: generalized (real) Schur form (S,T) = (Z'*A*Q,Z'*B*Q), with the first
662: matrix being upper quasi-triangular and the second one triangular.
664: Used DS matrices:
665: + DS_MAT_A - first problem matrix
666: . DS_MAT_B - second problem matrix
667: . DS_MAT_Q - first orthogonal/unitary transformation that reduces to
668: generalized (real) Schur form
669: - DS_MAT_Z - second orthogonal/unitary transformation that reduces to
670: generalized (real) Schur form
672: Implemented methods:
673: + 0 - QZ iteration (_gges)
674: - 1 - blocked QZ iteration (_gges3, if available)
676: .seealso: DSCreate(), DSSetType(), DSType
677: M*/
678: SLEPC_EXTERN PetscErrorCode DSCreate_GNHEP(DS ds)
679: {
680: PetscFunctionBegin;
681: ds->ops->allocate = DSAllocate_GNHEP;
682: ds->ops->view = DSView_GNHEP;
683: ds->ops->vectors = DSVectors_GNHEP;
684: ds->ops->solve[0] = DSSolve_GNHEP;
685: #if !defined(SLEPC_MISSING_LAPACK_GGES3)
686: ds->ops->solve[1] = DSSolve_GNHEP;
687: #endif
688: ds->ops->sort = DSSort_GNHEP;
689: #if !defined(PETSC_HAVE_MPIUNI)
690: ds->ops->synchronize = DSSynchronize_GNHEP;
691: #endif
692: ds->ops->gettruncatesize = DSGetTruncateSize_Default;
693: ds->ops->truncate = DSTruncate_GNHEP;
694: ds->ops->update = DSUpdateExtraRow_GNHEP;
695: PetscFunctionReturn(PETSC_SUCCESS);
696: }