Actual source code: dsgnhep.c

slepc-main 2024-12-17
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/dsimpl.h>
 12: #include <slepcblaslapack.h>

 14: /*
 15:   1) Patterns of A and B
 16:       DS_STATE_RAW:       DS_STATE_INTERM/CONDENSED
 17:        0       n-1              0       n-1
 18:       -------------            -------------
 19:     0 |* * * * * *|          0 |* * * * * *|
 20:       |* * * * * *|            |  * * * * *|
 21:       |* * * * * *|            |    * * * *|
 22:       |* * * * * *|            |    * * * *|
 23:       |* * * * * *|            |        * *|
 24:   n-1 |* * * * * *|        n-1 |          *|
 25:       -------------            -------------

 27:   2) Moreover, P and Q are assumed to be the identity in DS_STATE_INTERMEDIATE.
 28: */

 30: static PetscErrorCode CleanDenseSchur(PetscInt n,PetscInt k,PetscScalar *S,PetscInt ldS,PetscScalar *T,PetscInt ldT,PetscScalar *X,PetscInt ldX,PetscScalar *Y,PetscInt ldY);

 32: static PetscErrorCode DSAllocate_GNHEP(DS ds,PetscInt ld)
 33: {
 34:   PetscFunctionBegin;
 35:   PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
 36:   PetscCall(DSAllocateMat_Private(ds,DS_MAT_B));
 37:   PetscCall(DSAllocateMat_Private(ds,DS_MAT_Z));
 38:   PetscCall(DSAllocateMat_Private(ds,DS_MAT_Q));
 39:   PetscCall(PetscFree(ds->perm));
 40:   PetscCall(PetscMalloc1(ld,&ds->perm));
 41:   PetscFunctionReturn(PETSC_SUCCESS);
 42: }

 44: static PetscErrorCode DSView_GNHEP(DS ds,PetscViewer viewer)
 45: {
 46:   PetscViewerFormat format;

 48:   PetscFunctionBegin;
 49:   PetscCall(PetscViewerGetFormat(viewer,&format));
 50:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
 51:   PetscCall(DSViewMat(ds,viewer,DS_MAT_A));
 52:   PetscCall(DSViewMat(ds,viewer,DS_MAT_B));
 53:   if (ds->state>DS_STATE_INTERMEDIATE) {
 54:     PetscCall(DSViewMat(ds,viewer,DS_MAT_Z));
 55:     PetscCall(DSViewMat(ds,viewer,DS_MAT_Q));
 56:   }
 57:   if (ds->omat[DS_MAT_X]) PetscCall(DSViewMat(ds,viewer,DS_MAT_X));
 58:   if (ds->omat[DS_MAT_Y]) PetscCall(DSViewMat(ds,viewer,DS_MAT_Y));
 59:   PetscFunctionReturn(PETSC_SUCCESS);
 60: }

 62: static PetscErrorCode DSVectors_GNHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
 63: {
 64:   PetscInt       i;
 65:   PetscBLASInt   n,ld,mout,info,*select,mm,inc=1,cols=1,zero=0;
 66:   PetscScalar    *X,*Y,*XY,*Z,*Q,*A,*B,fone=1.0,fzero=0.0;
 67:   PetscReal      norm,done=1.0;
 68:   PetscBool      iscomplex = PETSC_FALSE;
 69:   const char     *side;

 71:   PetscFunctionBegin;
 72:   PetscCall(PetscBLASIntCast(ds->n,&n));
 73:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
 74:   if (left) {
 75:     X = NULL;
 76:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Y],&Y));
 77:     side = "L";
 78:   } else {
 79:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_X],&X));
 80:     Y = NULL;
 81:     side = "R";
 82:   }
 83:   XY = left? Y: X;
 84:   PetscCall(DSAllocateWork_Private(ds,0,0,ld));
 85:   select = ds->iwork;
 86:   for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;
 87:   if (ds->state <= DS_STATE_INTERMEDIATE) {
 88:     PetscCall(DSSetIdentity(ds,DS_MAT_Q));
 89:     PetscCall(DSSetIdentity(ds,DS_MAT_Z));
 90:   }
 91:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
 92:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&B));
 93:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
 94:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Z],&Z));
 95:   PetscCall(CleanDenseSchur(n,0,A,ld,B,ld,Q,ld,Z,ld));
 96:   if (ds->state < DS_STATE_CONDENSED) PetscCall(DSSetState(ds,DS_STATE_CONDENSED));

 98:   /* compute k-th eigenvector */
 99:   select[*k] = (PetscBLASInt)PETSC_TRUE;
100: #if defined(PETSC_USE_COMPLEX)
101:   mm = 1;
102:   PetscCall(DSAllocateWork_Private(ds,2*ld,2*ld,0));
103:   PetscCallBLAS("LAPACKtgevc",LAPACKtgevc_(side,"S",select,&n,A,&ld,B,&ld,PetscSafePointerPlusOffset(Y,(*k)*ld),&ld,PetscSafePointerPlusOffset(X,(*k)*ld),&ld,&mm,&mout,ds->work,ds->rwork,&info));
104: #else
105:   if ((*k)<n-1 && (A[ld*(*k)+(*k)+1] != 0.0 || B[ld*(*k)+(*k)+1] != 0.0)) iscomplex = PETSC_TRUE;
106:   mm = iscomplex? 2: 1;
107:   if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
108:   PetscCall(DSAllocateWork_Private(ds,6*ld,0,0));
109:   PetscCallBLAS("LAPACKtgevc",LAPACKtgevc_(side,"S",select,&n,A,&ld,B,&ld,PetscSafePointerPlusOffset(Y,(*k)*ld),&ld,PetscSafePointerPlusOffset(X,(*k)*ld),&ld,&mm,&mout,ds->work,&info));
110: #endif
111:   SlepcCheckLapackInfo("tgevc",info);
112:   PetscCheck(select[*k] && mout==mm,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong arguments in call to Lapack xTGEVC");
113:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
114:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&B));

116:   /* accumulate and normalize eigenvectors */
117:   PetscCall(PetscArraycpy(ds->work,XY+(*k)*ld,mm*ld));
118:   PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&mm,&n,&fone,left?Z:Q,&ld,ds->work,&ld,&fzero,XY+(*k)*ld,&ld));
119:   norm = BLASnrm2_(&n,XY+(*k)*ld,&inc);
120: #if !defined(PETSC_USE_COMPLEX)
121:   if (iscomplex) {
122:     norm = SlepcAbsEigenvalue(norm,BLASnrm2_(&n,XY+(*k+1)*ld,&inc));
123:     cols = 2;
124:   }
125: #endif
126:   PetscCallBLAS("LAPACKlascl",LAPACKlascl_("G",&zero,&zero,&norm,&done,&n,&cols,XY+(*k)*ld,&ld,&info));
127:   SlepcCheckLapackInfo("lascl",info);
128:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
129:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z));

131:   /* set output arguments */
132:   if (rnorm) {
133:     if (iscomplex) *rnorm = SlepcAbsEigenvalue(XY[n-1+(*k)*ld],XY[n-1+(*k+1)*ld]);
134:     else *rnorm = PetscAbsScalar(XY[n-1+(*k)*ld]);
135:   }
136:   if (iscomplex) (*k)++;
137:   PetscCall(MatDenseRestoreArray(ds->omat[left?DS_MAT_Y:DS_MAT_X],&XY));
138:   PetscFunctionReturn(PETSC_SUCCESS);
139: }

141: static PetscErrorCode DSVectors_GNHEP_Eigen_All(DS ds,PetscBool left)
142: {
143:   PetscInt       i;
144:   PetscBLASInt   n,ld,mout,info,inc = 1;
145:   PetscBool      iscomplex;
146:   PetscScalar    *X,*Y,*XY,*Q,*Z,*A,*B,tmp;
147:   PetscReal      norm;
148:   const char     *side,*back;

150:   PetscFunctionBegin;
151:   PetscCall(PetscBLASIntCast(ds->n,&n));
152:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
153:   if (left) {
154:     X = NULL;
155:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Y],&Y));
156:     side = "L";
157:   } else {
158:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_X],&X));
159:     Y = NULL;
160:     side = "R";
161:   }
162:   XY = left? Y: X;
163:   if (ds->state <= DS_STATE_INTERMEDIATE) {
164:     PetscCall(DSSetIdentity(ds,DS_MAT_Q));
165:     PetscCall(DSSetIdentity(ds,DS_MAT_Z));
166:   }
167:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
168:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&B));
169:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
170:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Z],&Z));
171:   PetscCall(CleanDenseSchur(n,0,A,ld,B,ld,Q,ld,Z,ld));
172:   if (ds->state>=DS_STATE_CONDENSED) {
173:     /* DSSolve() has been called, backtransform with matrix Q */
174:     back = "B";
175:     PetscCall(PetscArraycpy(left?Y:X,left?Z:Q,ld*ld));
176:   } else {
177:     back = "A";
178:     PetscCall(DSSetState(ds,DS_STATE_CONDENSED));
179:   }
180: #if defined(PETSC_USE_COMPLEX)
181:   PetscCall(DSAllocateWork_Private(ds,2*ld,2*ld,0));
182:   PetscCallBLAS("LAPACKtgevc",LAPACKtgevc_(side,back,NULL,&n,A,&ld,B,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,ds->rwork,&info));
183: #else
184:   PetscCall(DSAllocateWork_Private(ds,6*ld,0,0));
185:   PetscCallBLAS("LAPACKtgevc",LAPACKtgevc_(side,back,NULL,&n,A,&ld,B,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,&info));
186: #endif
187:   SlepcCheckLapackInfo("tgevc",info);
188:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
189:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z));

191:   /* normalize eigenvectors */
192:   for (i=0;i<n;i++) {
193:     iscomplex = (i<n-1 && (A[i+1+i*ld]!=0.0 || B[i+1+i*ld]!=0.0))? PETSC_TRUE: PETSC_FALSE;
194:     norm = BLASnrm2_(&n,XY+i*ld,&inc);
195: #if !defined(PETSC_USE_COMPLEX)
196:     if (iscomplex) {
197:       tmp = BLASnrm2_(&n,XY+(i+1)*ld,&inc);
198:       norm = SlepcAbsEigenvalue(norm,tmp);
199:     }
200: #endif
201:     tmp = 1.0 / norm;
202:     PetscCallBLAS("BLASscal",BLASscal_(&n,&tmp,XY+i*ld,&inc));
203: #if !defined(PETSC_USE_COMPLEX)
204:     if (iscomplex) PetscCallBLAS("BLASscal",BLASscal_(&n,&tmp,XY+(i+1)*ld,&inc));
205: #endif
206:     if (iscomplex) i++;
207:   }
208:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
209:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&B));
210:   PetscCall(MatDenseRestoreArray(ds->omat[left?DS_MAT_Y:DS_MAT_X],&XY));
211:   PetscFunctionReturn(PETSC_SUCCESS);
212: }

214: static PetscErrorCode DSVectors_GNHEP(DS ds,DSMatType mat,PetscInt *k,PetscReal *rnorm)
215: {
216:   PetscFunctionBegin;
217:   switch (mat) {
218:     case DS_MAT_X:
219:     case DS_MAT_Y:
220:       if (k) PetscCall(DSVectors_GNHEP_Eigen_Some(ds,k,rnorm,mat == DS_MAT_Y?PETSC_TRUE:PETSC_FALSE));
221:       else PetscCall(DSVectors_GNHEP_Eigen_All(ds,mat == DS_MAT_Y?PETSC_TRUE:PETSC_FALSE));
222:       break;
223:     default:
224:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
225:   }
226:   PetscFunctionReturn(PETSC_SUCCESS);
227: }

229: static PetscErrorCode DSSort_GNHEP_Arbitrary(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
230: {
231:   PetscInt       i;
232:   PetscBLASInt   info,n,ld,mout,lwork,liwork,*iwork,*selection,zero_=0,true_=1;
233:   PetscScalar    *S,*T,*Q,*Z,*work,*beta;

235:   PetscFunctionBegin;
236:   if (!ds->sc) PetscFunctionReturn(PETSC_SUCCESS);
237:   PetscCall(PetscBLASIntCast(ds->n,&n));
238:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
239: #if !defined(PETSC_USE_COMPLEX)
240:   lwork = 4*n+16;
241: #else
242:   lwork = 1;
243: #endif
244:   liwork = 1;
245:   PetscCall(DSAllocateWork_Private(ds,lwork+2*n,0,liwork+n));
246:   beta      = ds->work;
247:   work      = ds->work + n;
248:   PetscCall(PetscBLASIntCast(ds->lwork-n,&lwork));
249:   selection = ds->iwork;
250:   iwork     = ds->iwork + n;
251:   PetscCall(PetscBLASIntCast(ds->liwork-n,&liwork));
252:   /* Compute the selected eigenvalue to be in the leading position */
253:   PetscCall(DSSortEigenvalues_Private(ds,rr,ri,ds->perm,PETSC_FALSE));
254:   PetscCall(PetscArrayzero(selection,n));
255:   for (i=0; i<*k; i++) selection[ds->perm[i]] = 1;
256:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&S));
257:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&T));
258:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
259:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Z],&Z));
260: #if !defined(PETSC_USE_COMPLEX)
261:   PetscCallBLAS("LAPACKtgsen",LAPACKtgsen_(&zero_,&true_,&true_,selection,&n,S,&ld,T,&ld,wr,wi,beta,Z,&ld,Q,&ld,&mout,NULL,NULL,NULL,work,&lwork,iwork,&liwork,&info));
262: #else
263:   PetscCallBLAS("LAPACKtgsen",LAPACKtgsen_(&zero_,&true_,&true_,selection,&n,S,&ld,T,&ld,wr,beta,Z,&ld,Q,&ld,&mout,NULL,NULL,NULL,work,&lwork,iwork,&liwork,&info));
264: #endif
265:   SlepcCheckLapackInfo("tgsen",info);
266:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&S));
267:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&T));
268:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
269:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z));
270:   *k = mout;
271:   for (i=0;i<n;i++) {
272:     if (beta[i]==0.0) wr[i] = (PetscRealPart(wr[i])>0.0)? PETSC_MAX_REAL: PETSC_MIN_REAL;
273:     else wr[i] /= beta[i];
274: #if !defined(PETSC_USE_COMPLEX)
275:     if (beta[i]==0.0) wi[i] = (wi[i]>0.0)? PETSC_MAX_REAL: PETSC_MIN_REAL;
276:     else wi[i] /= beta[i];
277: #endif
278:   }
279:   PetscFunctionReturn(PETSC_SUCCESS);
280: }

282: static PetscErrorCode DSSort_GNHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi)
283: {
284:   PetscScalar    re;
285:   PetscInt       i,j,pos,result;
286:   PetscBLASInt   ifst,ilst,info,n,ld,one=1;
287:   PetscScalar    *S,*T,*Z,*Q;
288: #if !defined(PETSC_USE_COMPLEX)
289:   PetscBLASInt   lwork;
290:   PetscScalar    *work,a,safmin,scale1,scale2,im;
291: #endif

293:   PetscFunctionBegin;
294:   if (!ds->sc) PetscFunctionReturn(PETSC_SUCCESS);
295:   PetscCall(PetscBLASIntCast(ds->n,&n));
296:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
297:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&S));
298:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&T));
299:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
300:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Z],&Z));
301: #if !defined(PETSC_USE_COMPLEX)
302:   lwork = -1;
303:   PetscCallBLAS("LAPACKtgexc",LAPACKtgexc_(&one,&one,&ld,NULL,&ld,NULL,&ld,NULL,&ld,NULL,&ld,&one,&one,&a,&lwork,&info));
304:   SlepcCheckLapackInfo("tgexc",info);
305:   safmin = LAPACKlamch_("S");
306:   PetscCall(PetscBLASIntCast((PetscInt)a,&lwork));
307:   PetscCall(DSAllocateWork_Private(ds,lwork,0,0));
308:   work = ds->work;
309: #endif
310:   /* selection sort */
311:   for (i=ds->l;i<n-1;i++) {
312:     re = wr[i];
313: #if !defined(PETSC_USE_COMPLEX)
314:     im = wi[i];
315: #endif
316:     pos = 0;
317:     j = i+1; /* j points to the next eigenvalue */
318: #if !defined(PETSC_USE_COMPLEX)
319:     if (im != 0) j=i+2;
320: #endif
321:     /* find minimum eigenvalue */
322:     for (;j<n;j++) {
323: #if !defined(PETSC_USE_COMPLEX)
324:       PetscCall(SlepcSCCompare(ds->sc,re,im,wr[j],wi[j],&result));
325: #else
326:       PetscCall(SlepcSCCompare(ds->sc,re,0.0,wr[j],0.0,&result));
327: #endif
328:       if (result > 0) {
329:         re = wr[j];
330: #if !defined(PETSC_USE_COMPLEX)
331:         im = wi[j];
332: #endif
333:         pos = j;
334:       }
335: #if !defined(PETSC_USE_COMPLEX)
336:       if (wi[j] != 0) j++;
337: #endif
338:     }
339:     if (pos) {
340:       /* interchange blocks */
341:       PetscCall(PetscBLASIntCast(pos+1,&ifst));
342:       PetscCall(PetscBLASIntCast(i+1,&ilst));
343: #if !defined(PETSC_USE_COMPLEX)
344:       PetscCallBLAS("LAPACKtgexc",LAPACKtgexc_(&one,&one,&n,S,&ld,T,&ld,Z,&ld,Q,&ld,&ifst,&ilst,work,&lwork,&info));
345: #else
346:       PetscCallBLAS("LAPACKtgexc",LAPACKtgexc_(&one,&one,&n,S,&ld,T,&ld,Z,&ld,Q,&ld,&ifst,&ilst,&info));
347: #endif
348:       SlepcCheckLapackInfo("tgexc",info);
349:       /* recover original eigenvalues from T and S matrices */
350:       for (j=i;j<n;j++) {
351: #if !defined(PETSC_USE_COMPLEX)
352:         if (j<n-1 && S[j*ld+j+1] != 0.0) {
353:           /* complex conjugate eigenvalue */
354:           PetscCallBLAS("LAPACKlag2",LAPACKlag2_(S+j*ld+j,&ld,T+j*ld+j,&ld,&safmin,&scale1,&scale2,&re,&a,&im));
355:           wr[j] = re / scale1;
356:           wi[j] = im / scale1;
357:           wr[j+1] = a / scale2;
358:           wi[j+1] = -wi[j];
359:           j++;
360:         } else
361: #endif
362:         {
363:           if (T[j*ld+j] == 0.0) wr[j] = (PetscRealPart(S[j*ld+j])>0.0)? PETSC_MAX_REAL: PETSC_MIN_REAL;
364:           else wr[j] = S[j*ld+j] / T[j*ld+j];
365: #if !defined(PETSC_USE_COMPLEX)
366:           wi[j] = 0.0;
367: #endif
368:         }
369:       }
370:     }
371: #if !defined(PETSC_USE_COMPLEX)
372:     if (wi[i] != 0.0) i++;
373: #endif
374:   }
375:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&S));
376:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&T));
377:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
378:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z));
379:   PetscFunctionReturn(PETSC_SUCCESS);
380: }

382: static PetscErrorCode DSSort_GNHEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
383: {
384:   PetscFunctionBegin;
385:   if (!rr || wr == rr) PetscCall(DSSort_GNHEP_Total(ds,wr,wi));
386:   else PetscCall(DSSort_GNHEP_Arbitrary(ds,wr,wi,rr,ri,k));
387:   PetscFunctionReturn(PETSC_SUCCESS);
388: }

390: static PetscErrorCode DSUpdateExtraRow_GNHEP(DS ds)
391: {
392:   PetscInt          i;
393:   PetscBLASInt      n,ld,incx=1;
394:   PetscScalar       *A,*B,*x,*y,one=1.0,zero=0.0;
395:   const PetscScalar *Q;

397:   PetscFunctionBegin;
398:   PetscCall(PetscBLASIntCast(ds->n,&n));
399:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
400:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
401:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&B));
402:   PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_Q],&Q));
403:   PetscCall(DSAllocateWork_Private(ds,2*ld,0,0));
404:   x = ds->work;
405:   y = ds->work+ld;
406:   for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
407:   PetscCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
408:   for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
409:   for (i=0;i<n;i++) x[i] = PetscConj(B[n+i*ld]);
410:   PetscCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
411:   for (i=0;i<n;i++) B[n+i*ld] = PetscConj(y[i]);
412:   ds->k = n;
413:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
414:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&B));
415:   PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_Q],&Q));
416:   PetscFunctionReturn(PETSC_SUCCESS);
417: }

419: /*
420:    Write zeros from the column k to n in the lower triangular part of the
421:    matrices S and T, and inside 2-by-2 diagonal blocks of T in order to
422:    make (S,T) a valid Schur decompositon.
423: */
424: static PetscErrorCode CleanDenseSchur(PetscInt n,PetscInt k,PetscScalar *S,PetscInt ldS,PetscScalar *T,PetscInt ldT,PetscScalar *X,PetscInt ldX,PetscScalar *Y,PetscInt ldY)
425: {
426:   PetscInt       i;
427: #if defined(PETSC_USE_COMPLEX)
428:   PetscInt       j;
429:   PetscScalar    s;
430: #else
431:   PetscBLASInt   ldS_,ldT_,n_i,n_i_2,one=1,n_,i_2,i_;
432:   PetscScalar    b11,b22,sr,cr,sl,cl;
433: #endif

435:   PetscFunctionBegin;
436: #if defined(PETSC_USE_COMPLEX)
437:   for (i=k; i<n; i++) {
438:     /* Some functions need the diagonal elements in T be real */
439:     if (T && PetscImaginaryPart(T[ldT*i+i]) != 0.0) {
440:       s = PetscConj(T[ldT*i+i])/PetscAbsScalar(T[ldT*i+i]);
441:       for (j=0;j<=i;j++) {
442:         T[ldT*i+j] *= s;
443:         S[ldS*i+j] *= s;
444:       }
445:       T[ldT*i+i] = PetscRealPart(T[ldT*i+i]);
446:       if (X) for (j=0;j<n;j++) X[ldX*i+j] *= s;
447:     }
448:     j = i+1;
449:     if (j<n) {
450:       S[ldS*i+j] = 0.0;
451:       if (T) T[ldT*i+j] = 0.0;
452:     }
453:   }
454: #else
455:   PetscCall(PetscBLASIntCast(ldS,&ldS_));
456:   PetscCall(PetscBLASIntCast(ldT,&ldT_));
457:   PetscCall(PetscBLASIntCast(n,&n_));
458:   for (i=k;i<n-1;i++) {
459:     if (S[ldS*i+i+1] != 0.0) {
460:       /* Check if T(i+1,i) and T(i,i+1) are zero */
461:       if (T[ldT*(i+1)+i] != 0.0 || T[ldT*i+i+1] != 0.0) {
462:         /* Check if T(i+1,i) and T(i,i+1) are negligible */
463:         if (PetscAbs(T[ldT*(i+1)+i])+PetscAbs(T[ldT*i+i+1]) < (PetscAbs(T[ldT*i+i])+PetscAbs(T[ldT*(i+1)+i+1]))*PETSC_MACHINE_EPSILON) {
464:           T[ldT*i+i+1] = 0.0;
465:           T[ldT*(i+1)+i] = 0.0;
466:         } else {
467:           /* If one of T(i+1,i) or T(i,i+1) is negligible, we make zero the other element */
468:           if (PetscAbs(T[ldT*i+i+1]) < (PetscAbs(T[ldT*i+i])+PetscAbs(T[ldT*(i+1)+i+1])+PetscAbs(T[ldT*(i+1)+i]))*PETSC_MACHINE_EPSILON) {
469:             PetscCallBLAS("LAPACKlasv2",LAPACKlasv2_(&T[ldT*i+i],&T[ldT*(i+1)+i],&T[ldT*(i+1)+i+1],&b22,&b11,&sl,&cl,&sr,&cr));
470:           } else if (PetscAbs(T[ldT*(i+1)+i]) < (PetscAbs(T[ldT*i+i])+PetscAbs(T[ldT*(i+1)+i+1])+PetscAbs(T[ldT*i+i+1]))*PETSC_MACHINE_EPSILON) {
471:             PetscCallBLAS("LAPACKlasv2",LAPACKlasv2_(&T[ldT*i+i],&T[ldT*i+i+1],&T[ldT*(i+1)+i+1],&b22,&b11,&sr,&cr,&sl,&cl));
472:           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported format. Call DSSolve before this function");
473:           PetscCall(PetscBLASIntCast(n-i,&n_i));
474:           n_i_2 = n_i - 2;
475:           PetscCall(PetscBLASIntCast(i+2,&i_2));
476:           PetscCall(PetscBLASIntCast(i,&i_));
477:           if (b11 < 0.0) {
478:             cr = -cr; sr = -sr;
479:             b11 = -b11; b22 = -b22;
480:           }
481:           PetscCallBLAS("BLASrot",BLASrot_(&n_i,&S[ldS*i+i],&ldS_,&S[ldS*i+i+1],&ldS_,&cl,&sl));
482:           PetscCallBLAS("BLASrot",BLASrot_(&i_2,&S[ldS*i],&one,&S[ldS*(i+1)],&one,&cr,&sr));
483:           PetscCallBLAS("BLASrot",BLASrot_(&n_i_2,&T[ldT*(i+2)+i],&ldT_,&T[ldT*(i+2)+i+1],&ldT_,&cl,&sl));
484:           PetscCallBLAS("BLASrot",BLASrot_(&i_,&T[ldT*i],&one,&T[ldT*(i+1)],&one,&cr,&sr));
485:           if (X) PetscCallBLAS("BLASrot",BLASrot_(&n_,&X[ldX*i],&one,&X[ldX*(i+1)],&one,&cr,&sr));
486:           if (Y) PetscCallBLAS("BLASrot",BLASrot_(&n_,&Y[ldY*i],&one,&Y[ldY*(i+1)],&one,&cl,&sl));
487:           T[ldT*i+i] = b11; T[ldT*i+i+1] = 0.0;
488:           T[ldT*(i+1)+i] = 0.0; T[ldT*(i+1)+i+1] = b22;
489:         }
490:       }
491:       i++;
492:     }
493:   }
494: #endif
495:   PetscFunctionReturn(PETSC_SUCCESS);
496: }

498: static PetscErrorCode DSSolve_GNHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
499: {
500:   PetscScalar    *work,*beta,a;
501:   PetscInt       i;
502:   PetscBLASInt   lwork,info,n,ld,iaux;
503:   PetscScalar    *A,*B,*Z,*Q;
504:   PetscBool      usegges3=(ds->method==1)?PETSC_TRUE:PETSC_FALSE;

506:   PetscFunctionBegin;
507: #if !defined(PETSC_USE_COMPLEX)
508:   PetscAssertPointer(wi,3);
509: #endif
510:   PetscCall(PetscBLASIntCast(ds->n,&n));
511:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
512:   lwork = -1;
513:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
514:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&B));
515:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
516:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Z],&Z));
517: #if !defined(PETSC_USE_COMPLEX)
518:   if (usegges3) PetscCallBLAS("LAPACKgges3",LAPACKgges3_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,wi,NULL,Z,&ld,Q,&ld,&a,&lwork,NULL,&info));
519:   else PetscCallBLAS("LAPACKgges",LAPACKgges_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,wi,NULL,Z,&ld,Q,&ld,&a,&lwork,NULL,&info));
520:   PetscCall(PetscBLASIntCast((PetscInt)a,&lwork));
521:   PetscCall(DSAllocateWork_Private(ds,lwork+ld,0,0));
522:   beta = ds->work;
523:   work = beta+ds->n;
524:   PetscCall(PetscBLASIntCast(ds->lwork-ds->n,&lwork));
525:   if (usegges3) PetscCallBLAS("LAPACKgges3",LAPACKgges3_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,wi,beta,Z,&ld,Q,&ld,work,&lwork,NULL,&info));
526:   else PetscCallBLAS("LAPACKgges",LAPACKgges_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,wi,beta,Z,&ld,Q,&ld,work,&lwork,NULL,&info));
527: #else
528:   if (usegges3) PetscCallBLAS("LAPACKgges3",LAPACKgges3_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,NULL,Z,&ld,Q,&ld,&a,&lwork,NULL,NULL,&info));
529:   else PetscCallBLAS("LAPACKgges",LAPACKgges_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,NULL,Z,&ld,Q,&ld,&a,&lwork,NULL,NULL,&info));
530:   PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork));
531:   PetscCall(DSAllocateWork_Private(ds,lwork+ld,8*ld,0));
532:   beta = ds->work;
533:   work = beta+ds->n;
534:   PetscCall(PetscBLASIntCast(ds->lwork-ds->n,&lwork));
535:   if (usegges3) PetscCallBLAS("LAPACKgges3",LAPACKgges3_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,beta,Z,&ld,Q,&ld,work,&lwork,ds->rwork,NULL,&info));
536:   else PetscCallBLAS("LAPACKgges",LAPACKgges_("V","V","N",NULL,&n,A,&ld,B,&ld,&iaux,wr,beta,Z,&ld,Q,&ld,work,&lwork,ds->rwork,NULL,&info));
537: #endif
538:   SlepcCheckLapackInfo(usegges3?"gges3":"gges",info);
539:   for (i=0;i<n;i++) {
540:     if (beta[i]==0.0) wr[i] = (PetscRealPart(wr[i])>0.0)? PETSC_MAX_REAL: PETSC_MIN_REAL;
541:     else wr[i] /= beta[i];
542: #if !defined(PETSC_USE_COMPLEX)
543:     if (beta[i]==0.0) wi[i] = (wi[i]>0.0)? PETSC_MAX_REAL: PETSC_MIN_REAL;
544:     else wi[i] /= beta[i];
545: #else
546:     if (wi) wi[i] = 0.0;
547: #endif
548:   }
549:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
550:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&B));
551:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
552:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z));
553:   PetscFunctionReturn(PETSC_SUCCESS);
554: }

556: #if !defined(PETSC_HAVE_MPIUNI)
557: static PetscErrorCode DSSynchronize_GNHEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
558: {
559:   PetscInt       ld=ds->ld,l=ds->l,k;
560:   PetscMPIInt    n,rank,off=0,size,ldn;
561:   PetscScalar    *A,*B,*Q,*Z;

563:   PetscFunctionBegin;
564:   k = 2*(ds->n-l)*ld;
565:   if (ds->state>DS_STATE_RAW) k += 2*(ds->n-l)*ld;
566:   if (eigr) k += (ds->n-l);
567:   if (eigi) k += (ds->n-l);
568:   PetscCall(DSAllocateWork_Private(ds,k,0,0));
569:   PetscCall(PetscMPIIntCast(k*sizeof(PetscScalar),&size));
570:   PetscCall(PetscMPIIntCast(ds->n-l,&n));
571:   PetscCall(PetscMPIIntCast(ld*(ds->n-l),&ldn));
572:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
573:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&B));
574:   if (ds->state>DS_STATE_RAW) {
575:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
576:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Z],&Z));
577:   }
578:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank));
579:   if (!rank) {
580:     PetscCallMPI(MPI_Pack(A+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
581:     PetscCallMPI(MPI_Pack(B+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
582:     if (ds->state>DS_STATE_RAW) {
583:       PetscCallMPI(MPI_Pack(Q+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
584:       PetscCallMPI(MPI_Pack(Z+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
585:     }
586:     if (eigr) PetscCallMPI(MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
587: #if !defined(PETSC_USE_COMPLEX)
588:     if (eigi) PetscCallMPI(MPI_Pack(eigi+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
589: #endif
590:   }
591:   PetscCallMPI(MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds)));
592:   if (rank) {
593:     PetscCallMPI(MPI_Unpack(ds->work,size,&off,A+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
594:     PetscCallMPI(MPI_Unpack(ds->work,size,&off,B+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
595:     if (ds->state>DS_STATE_RAW) {
596:       PetscCallMPI(MPI_Unpack(ds->work,size,&off,Q+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
597:       PetscCallMPI(MPI_Unpack(ds->work,size,&off,Z+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
598:     }
599:     if (eigr) PetscCallMPI(MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
600: #if !defined(PETSC_USE_COMPLEX)
601:     if (eigi) PetscCallMPI(MPI_Unpack(ds->work,size,&off,eigi+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
602: #endif
603:   }
604:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
605:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&B));
606:   if (ds->state>DS_STATE_RAW) {
607:     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
608:     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Z],&Z));
609:   }
610:   PetscFunctionReturn(PETSC_SUCCESS);
611: }
612: #endif

614: static PetscErrorCode DSTruncate_GNHEP(DS ds,PetscInt n,PetscBool trim)
615: {
616:   PetscInt    i,ld=ds->ld,l=ds->l;
617:   PetscScalar *A,*B;

619:   PetscFunctionBegin;
620:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
621:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_B],&B));
622: #if defined(PETSC_USE_DEBUG)
623:   /* make sure diagonal 2x2 block is not broken */
624:   PetscCheck(ds->state<DS_STATE_CONDENSED || n==0 || n==ds->n || (A[n+(n-1)*ld]==0.0 && B[n+(n-1)*ld]==0.0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The given size would break a 2x2 block, call DSGetTruncateSize() first");
625: #endif
626:   if (trim) {
627:     if (ds->extrarow) {   /* clean extra row */
628:       for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
629:       for (i=l;i<ds->n;i++) B[ds->n+i*ld] = 0.0;
630:     }
631:     ds->l = 0;
632:     ds->k = 0;
633:     ds->n = n;
634:     ds->t = ds->n;   /* truncated length equal to the new dimension */
635:   } else {
636:     if (ds->extrarow && ds->k==ds->n) {
637:       /* copy entries of extra row to the new position, then clean last row */
638:       for (i=l;i<n;i++) A[n+i*ld] = A[ds->n+i*ld];
639:       for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
640:       for (i=l;i<n;i++) B[n+i*ld] = B[ds->n+i*ld];
641:       for (i=l;i<ds->n;i++) B[ds->n+i*ld] = 0.0;
642:     }
643:     ds->k = (ds->extrarow)? n: 0;
644:     ds->t = ds->n;   /* truncated length equal to previous dimension */
645:     ds->n = n;
646:   }
647:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
648:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_B],&B));
649:   PetscFunctionReturn(PETSC_SUCCESS);
650: }

652: /*MC
653:    DSGNHEP - Dense Generalized Non-Hermitian Eigenvalue Problem.

655:    Level: beginner

657:    Notes:
658:    The problem is expressed as A*X = B*X*Lambda, where (A,B) is the input
659:    matrix pencil. Lambda is a diagonal matrix whose diagonal elements are the
660:    arguments of DSSolve(). After solve, (A,B) is overwritten with the
661:    generalized (real) Schur form (S,T) = (Z'*A*Q,Z'*B*Q), with the first
662:    matrix being upper quasi-triangular and the second one triangular.

664:    Used DS matrices:
665: +  DS_MAT_A - first problem matrix
666: .  DS_MAT_B - second problem matrix
667: .  DS_MAT_Q - first orthogonal/unitary transformation that reduces to
668:    generalized (real) Schur form
669: -  DS_MAT_Z - second orthogonal/unitary transformation that reduces to
670:    generalized (real) Schur form

672:    Implemented methods:
673: +  0 - QZ iteration (_gges)
674: -  1 - blocked QZ iteration (_gges3, if available)

676: .seealso: DSCreate(), DSSetType(), DSType
677: M*/
678: SLEPC_EXTERN PetscErrorCode DSCreate_GNHEP(DS ds)
679: {
680:   PetscFunctionBegin;
681:   ds->ops->allocate        = DSAllocate_GNHEP;
682:   ds->ops->view            = DSView_GNHEP;
683:   ds->ops->vectors         = DSVectors_GNHEP;
684:   ds->ops->solve[0]        = DSSolve_GNHEP;
685: #if !defined(SLEPC_MISSING_LAPACK_GGES3)
686:   ds->ops->solve[1]        = DSSolve_GNHEP;
687: #endif
688:   ds->ops->sort            = DSSort_GNHEP;
689: #if !defined(PETSC_HAVE_MPIUNI)
690:   ds->ops->synchronize     = DSSynchronize_GNHEP;
691: #endif
692:   ds->ops->gettruncatesize = DSGetTruncateSize_Default;
693:   ds->ops->truncate        = DSTruncate_GNHEP;
694:   ds->ops->update          = DSUpdateExtraRow_GNHEP;
695:   PetscFunctionReturn(PETSC_SUCCESS);
696: }