Actual source code: dsnhep.c

slepc-main 2024-12-17
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/dsimpl.h>
 12: #include <slepcblaslapack.h>

 14: static PetscErrorCode DSAllocate_NHEP(DS ds,PetscInt ld)
 15: {
 16:   PetscFunctionBegin;
 17:   PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
 18:   PetscCall(DSAllocateMat_Private(ds,DS_MAT_Q));
 19:   PetscCall(PetscFree(ds->perm));
 20:   PetscCall(PetscMalloc1(ld,&ds->perm));
 21:   PetscFunctionReturn(PETSC_SUCCESS);
 22: }

 24: static PetscErrorCode DSView_NHEP(DS ds,PetscViewer viewer)
 25: {
 26:   PetscViewerFormat format;

 28:   PetscFunctionBegin;
 29:   PetscCall(PetscViewerGetFormat(viewer,&format));
 30:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) PetscFunctionReturn(PETSC_SUCCESS);
 31:   PetscCall(DSViewMat(ds,viewer,DS_MAT_A));
 32:   if (ds->state>DS_STATE_INTERMEDIATE) PetscCall(DSViewMat(ds,viewer,DS_MAT_Q));
 33:   if (ds->omat[DS_MAT_X]) PetscCall(DSViewMat(ds,viewer,DS_MAT_X));
 34:   if (ds->omat[DS_MAT_Y]) PetscCall(DSViewMat(ds,viewer,DS_MAT_Y));
 35:   PetscFunctionReturn(PETSC_SUCCESS);
 36: }

 38: static PetscErrorCode DSVectors_NHEP_Refined_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
 39: {
 40:   PetscInt          i,j;
 41:   PetscBLASInt      info,ld,n,n1,lwork,inc=1;
 42:   PetscScalar       sdummy,done=1.0,zero=0.0;
 43:   PetscReal         *sigma;
 44:   PetscBool         iscomplex = PETSC_FALSE;
 45:   PetscScalar       *X,*W;
 46:   const PetscScalar *A,*Q;

 48:   PetscFunctionBegin;
 49:   PetscCheck(!left,PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented for left vectors");
 50:   PetscCall(PetscBLASIntCast(ds->n,&n));
 51:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
 52:   n1 = n+1;
 53:   PetscCall(DSAllocateWork_Private(ds,5*ld,6*ld,0));
 54:   PetscCall(DSAllocateMat_Private(ds,DS_MAT_W));
 55:   lwork = 5*ld;
 56:   sigma = ds->rwork+5*ld;

 58:   /* build A-w*I in W */
 59:   PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_A],&A));
 60:   PetscCall(MatDenseGetArrayWrite(ds->omat[DS_MAT_W],&W));
 61:   if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
 62:   PetscCheck(!iscomplex,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complex eigenvalues yet");
 63:   for (j=0;j<n;j++)
 64:     for (i=0;i<=n;i++)
 65:       W[i+j*ld] = A[i+j*ld];
 66:   for (i=0;i<n;i++)
 67:     W[i+i*ld] -= A[(*k)+(*k)*ld];
 68:   PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_A],&A));

 70:   /* compute SVD of W */
 71: #if !defined(PETSC_USE_COMPLEX)
 72:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,&info));
 73: #else
 74:   PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,ds->rwork,&info));
 75: #endif
 76:   SlepcCheckLapackInfo("gesvd",info);

 78:   /* the smallest singular value is the new error estimate */
 79:   if (rnorm) *rnorm = sigma[n-1];

 81:   /* update vector with right singular vector associated to smallest singular value,
 82:      accumulating the transformation matrix Q */
 83:   PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_Q],&Q));
 84:   PetscCall(MatDenseGetArray(ds->omat[left?DS_MAT_Y:DS_MAT_X],&X));
 85:   PetscCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,W+n-1,&ld,&zero,X+(*k)*ld,&inc));
 86:   PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_Q],&Q));
 87:   PetscCall(MatDenseRestoreArray(ds->omat[left?DS_MAT_Y:DS_MAT_X],&X));
 88:   PetscCall(MatDenseRestoreArrayWrite(ds->omat[DS_MAT_W],&W));
 89:   PetscFunctionReturn(PETSC_SUCCESS);
 90: }

 92: static PetscErrorCode DSVectors_NHEP_Refined_All(DS ds,PetscBool left)
 93: {
 94:   PetscInt       i;

 96:   PetscFunctionBegin;
 97:   for (i=0;i<ds->n;i++) PetscCall(DSVectors_NHEP_Refined_Some(ds,&i,NULL,left));
 98:   PetscFunctionReturn(PETSC_SUCCESS);
 99: }

101: static PetscErrorCode DSVectors_NHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
102: {
103:   PetscInt          i;
104:   PetscBLASInt      mm=1,mout,info,ld,n,*select,inc=1,cols=1,zero=0;
105:   PetscScalar       sone=1.0,szero=0.0;
106:   PetscReal         norm,done=1.0;
107:   PetscBool         iscomplex = PETSC_FALSE;
108:   PetscScalar       *X,*Y;
109:   const PetscScalar *A,*Q;

111:   PetscFunctionBegin;
112:   PetscCall(PetscBLASIntCast(ds->n,&n));
113:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
114:   PetscCall(DSAllocateWork_Private(ds,0,0,ld));
115:   select = ds->iwork;
116:   for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;

118:   /* compute k-th eigenvector Y of A */
119:   PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_A],&A));
120:   PetscCall(MatDenseGetArray(ds->omat[left?DS_MAT_Y:DS_MAT_X],&X));
121:   Y = X+(*k)*ld;
122:   select[*k] = (PetscBLASInt)PETSC_TRUE;
123: #if !defined(PETSC_USE_COMPLEX)
124:   if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
125:   mm = iscomplex? 2: 1;
126:   if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
127:   PetscCall(DSAllocateWork_Private(ds,3*ld,0,0));
128:   PetscCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,(PetscScalar*)A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,&info));
129: #else
130:   PetscCall(DSAllocateWork_Private(ds,2*ld,ld,0));
131:   PetscCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,(PetscScalar*)A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,ds->rwork,&info));
132: #endif
133:   SlepcCheckLapackInfo("trevc",info);
134:   PetscCheck(mout==mm,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Inconsistent arguments");
135:   PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_A],&A));

137:   /* accumulate and normalize eigenvectors */
138:   if (ds->state>=DS_STATE_CONDENSED) {
139:     PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_Q],&Q));
140:     PetscCall(PetscArraycpy(ds->work,Y,mout*ld));
141:     PetscCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&sone,Q,&ld,ds->work,&inc,&szero,Y,&inc));
142: #if !defined(PETSC_USE_COMPLEX)
143:     if (iscomplex) PetscCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&sone,Q,&ld,ds->work+ld,&inc,&szero,Y+ld,&inc));
144: #endif
145:     PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_Q],&Q));
146:     cols = 1;
147:     norm = BLASnrm2_(&n,Y,&inc);
148: #if !defined(PETSC_USE_COMPLEX)
149:     if (iscomplex) {
150:       norm = SlepcAbsEigenvalue(norm,BLASnrm2_(&n,Y+ld,&inc));
151:       cols = 2;
152:     }
153: #endif
154:     PetscCallBLAS("LAPACKlascl",LAPACKlascl_("G",&zero,&zero,&norm,&done,&n,&cols,Y,&ld,&info));
155:     SlepcCheckLapackInfo("lascl",info);
156:   }

158:   /* set output arguments */
159:   if (iscomplex) (*k)++;
160:   if (rnorm) {
161:     if (iscomplex) *rnorm = SlepcAbsEigenvalue(Y[n-1],Y[n-1+ld]);
162:     else *rnorm = PetscAbsScalar(Y[n-1]);
163:   }
164:   PetscCall(MatDenseRestoreArray(ds->omat[left?DS_MAT_Y:DS_MAT_X],&X));
165:   PetscFunctionReturn(PETSC_SUCCESS);
166: }

168: static PetscErrorCode DSVectors_NHEP_Eigen_All(DS ds,PetscBool left)
169: {
170:   PetscInt          i;
171:   PetscBLASInt      n,ld,mout,info,inc=1,cols,zero=0;
172:   PetscBool         iscomplex;
173:   PetscScalar       *X,*Y,*Z;
174:   const PetscScalar *A,*Q;
175:   PetscReal         norm,done=1.0;
176:   const char        *side,*back;

178:   PetscFunctionBegin;
179:   PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_A],&A));
180:   PetscCall(PetscBLASIntCast(ds->n,&n));
181:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
182:   if (left) {
183:     X = NULL;
184:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Y],&Y));
185:     side = "L";
186:   } else {
187:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_X],&X));
188:     Y = NULL;
189:     side = "R";
190:   }
191:   Z = left? Y: X;
192:   if (ds->state>=DS_STATE_CONDENSED) {
193:     /* DSSolve() has been called, backtransform with matrix Q */
194:     back = "B";
195:     PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_Q],&Q));
196:     PetscCall(PetscArraycpy(Z,Q,ld*ld));
197:     PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_Q],&Q));
198:   } else back = "A";
199: #if !defined(PETSC_USE_COMPLEX)
200:   PetscCall(DSAllocateWork_Private(ds,3*ld,0,0));
201:   PetscCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,(PetscScalar*)A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,&info));
202: #else
203:   PetscCall(DSAllocateWork_Private(ds,2*ld,ld,0));
204:   PetscCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,(PetscScalar*)A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,ds->rwork,&info));
205: #endif
206:   SlepcCheckLapackInfo("trevc",info);

208:   /* normalize eigenvectors */
209:   for (i=0;i<n;i++) {
210:     iscomplex = (i<n-1 && A[i+1+i*ld]!=0.0)? PETSC_TRUE: PETSC_FALSE;
211:     cols = 1;
212:     norm = BLASnrm2_(&n,Z+i*ld,&inc);
213: #if !defined(PETSC_USE_COMPLEX)
214:     if (iscomplex) {
215:       norm = SlepcAbsEigenvalue(norm,BLASnrm2_(&n,Z+(i+1)*ld,&inc));
216:       cols = 2;
217:     }
218: #endif
219:     PetscCallBLAS("LAPACKlascl",LAPACKlascl_("G",&zero,&zero,&norm,&done,&n,&cols,Z+i*ld,&ld,&info));
220:     SlepcCheckLapackInfo("lascl",info);
221:     if (iscomplex) i++;
222:   }
223:   PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_A],&A));
224:   PetscCall(MatDenseRestoreArray(ds->omat[left?DS_MAT_Y:DS_MAT_X],&Z));
225:   PetscFunctionReturn(PETSC_SUCCESS);
226: }

228: static PetscErrorCode DSVectors_NHEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
229: {
230:   PetscFunctionBegin;
231:   switch (mat) {
232:     case DS_MAT_X:
233:       if (ds->refined) {
234:         PetscCheck(ds->extrarow,PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Refined vectors require activating the extra row");
235:         if (j) PetscCall(DSVectors_NHEP_Refined_Some(ds,j,rnorm,PETSC_FALSE));
236:         else PetscCall(DSVectors_NHEP_Refined_All(ds,PETSC_FALSE));
237:       } else {
238:         if (j) PetscCall(DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_FALSE));
239:         else PetscCall(DSVectors_NHEP_Eigen_All(ds,PETSC_FALSE));
240:       }
241:       break;
242:     case DS_MAT_Y:
243:       PetscCheck(!ds->refined,PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
244:       if (j) PetscCall(DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_TRUE));
245:       else PetscCall(DSVectors_NHEP_Eigen_All(ds,PETSC_TRUE));
246:       break;
247:     case DS_MAT_U:
248:     case DS_MAT_V:
249:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
250:     default:
251:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
252:   }
253:   PetscFunctionReturn(PETSC_SUCCESS);
254: }

256: static PetscErrorCode DSSort_NHEP_Arbitrary(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
257: {
258:   PetscInt       i;
259:   PetscBLASInt   info,n,ld,mout,lwork,*selection;
260:   PetscScalar    *T,*Q,*work;
261:   PetscReal      dummy;
262: #if !defined(PETSC_USE_COMPLEX)
263:   PetscBLASInt   *iwork,liwork;
264: #endif

266:   PetscFunctionBegin;
267:   PetscCheck(k,PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_WRONG,"Must supply argument k");
268:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&T));
269:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
270:   PetscCall(PetscBLASIntCast(ds->n,&n));
271:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
272: #if !defined(PETSC_USE_COMPLEX)
273:   lwork = n;
274:   liwork = 1;
275:   PetscCall(DSAllocateWork_Private(ds,lwork,0,liwork+n));
276:   work = ds->work;
277:   PetscCall(PetscBLASIntCast(ds->lwork,&lwork));
278:   selection = ds->iwork;
279:   iwork = ds->iwork + n;
280:   PetscCall(PetscBLASIntCast(ds->liwork-n,&liwork));
281: #else
282:   lwork = 1;
283:   PetscCall(DSAllocateWork_Private(ds,lwork,0,n));
284:   work = ds->work;
285:   selection = ds->iwork;
286: #endif
287:   /* Compute the selected eigenvalue to be in the leading position */
288:   PetscCall(DSSortEigenvalues_Private(ds,rr,ri,ds->perm,PETSC_FALSE));
289:   PetscCall(PetscArrayzero(selection,n));
290:   for (i=0;i<*k;i++) selection[ds->perm[i]] = 1;
291: #if !defined(PETSC_USE_COMPLEX)
292:   PetscCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,wi,&mout,&dummy,&dummy,work,&lwork,iwork,&liwork,&info));
293: #else
294:   PetscCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,&mout,&dummy,&dummy,work,&lwork,&info));
295: #endif
296:   SlepcCheckLapackInfo("trsen",info);
297:   *k = mout;
298:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&T));
299:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
300:   PetscFunctionReturn(PETSC_SUCCESS);
301: }

303: static PetscErrorCode DSSort_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
304: {
305:   PetscFunctionBegin;
306:   if (!rr || wr == rr) PetscCall(DSSort_NHEP_Total(ds,DS_MAT_A,DS_MAT_Q,wr,wi));
307:   else PetscCall(DSSort_NHEP_Arbitrary(ds,wr,wi,rr,ri,k));
308:   PetscFunctionReturn(PETSC_SUCCESS);
309: }

311: static PetscErrorCode DSSortWithPermutation_NHEP(DS ds,PetscInt *perm,PetscScalar *wr,PetscScalar *wi)
312: {
313:   PetscFunctionBegin;
314:   PetscCall(DSSortWithPermutation_NHEP_Private(ds,perm,DS_MAT_A,DS_MAT_Q,wr,wi));
315:   PetscFunctionReturn(PETSC_SUCCESS);
316: }

318: static PetscErrorCode DSUpdateExtraRow_NHEP(DS ds)
319: {
320:   PetscInt          i;
321:   PetscBLASInt      n,ld,incx=1;
322:   PetscScalar       *A,*x,*y,one=1.0,zero=0.0;
323:   const PetscScalar *Q;

325:   PetscFunctionBegin;
326:   PetscCall(PetscBLASIntCast(ds->n,&n));
327:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
328:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
329:   PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_Q],&Q));
330:   PetscCall(DSAllocateWork_Private(ds,2*ld,0,0));
331:   x = ds->work;
332:   y = ds->work+ld;
333:   for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
334:   PetscCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
335:   for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
336:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
337:   PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_Q],&Q));
338:   ds->k = n;
339:   PetscFunctionReturn(PETSC_SUCCESS);
340: }

342: static PetscErrorCode DSSolve_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
343: {
344:   PetscFunctionBegin;
345: #if !defined(PETSC_USE_COMPLEX)
346:   PetscAssertPointer(wi,3);
347: #endif
348:   PetscCall(DSSolve_NHEP_Private(ds,DS_MAT_A,DS_MAT_Q,wr,wi));
349:   PetscFunctionReturn(PETSC_SUCCESS);
350: }

352: #if !defined(PETSC_HAVE_MPIUNI)
353: static PetscErrorCode DSSynchronize_NHEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
354: {
355:   PetscInt       ld=ds->ld,l=ds->l,k;
356:   PetscMPIInt    n,rank,off=0,size,ldn;
357:   PetscScalar    *A,*Q;

359:   PetscFunctionBegin;
360:   k = (ds->n-l)*ld;
361:   if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
362:   if (eigr) k += ds->n-l;
363:   if (eigi) k += ds->n-l;
364:   PetscCall(DSAllocateWork_Private(ds,k,0,0));
365:   PetscCall(PetscMPIIntCast(k*sizeof(PetscScalar),&size));
366:   PetscCall(PetscMPIIntCast(ds->n-l,&n));
367:   PetscCall(PetscMPIIntCast(ld*(ds->n-l),&ldn));
368:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
369:   if (ds->state>DS_STATE_RAW) PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
370:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank));
371:   if (!rank) {
372:     PetscCallMPI(MPI_Pack(A+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
373:     if (ds->state>DS_STATE_RAW) PetscCallMPI(MPI_Pack(Q+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
374:     if (eigr) PetscCallMPI(MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
375: #if !defined(PETSC_USE_COMPLEX)
376:     if (eigi) PetscCallMPI(MPI_Pack(eigi+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
377: #endif
378:   }
379:   PetscCallMPI(MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds)));
380:   if (rank) {
381:     PetscCallMPI(MPI_Unpack(ds->work,size,&off,A+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
382:     if (ds->state>DS_STATE_RAW) PetscCallMPI(MPI_Unpack(ds->work,size,&off,Q+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
383:     if (eigr) PetscCallMPI(MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
384: #if !defined(PETSC_USE_COMPLEX)
385:     if (eigi) PetscCallMPI(MPI_Unpack(ds->work,size,&off,eigi+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
386: #endif
387:   }
388:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
389:   if (ds->state>DS_STATE_RAW) PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
390:   PetscFunctionReturn(PETSC_SUCCESS);
391: }
392: #endif

394: static PetscErrorCode DSTruncate_NHEP(DS ds,PetscInt n,PetscBool trim)
395: {
396:   PetscInt    i,ld=ds->ld,l=ds->l;
397:   PetscScalar *A;

399:   PetscFunctionBegin;
400:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
401: #if defined(PETSC_USE_DEBUG)
402:   /* make sure diagonal 2x2 block is not broken */
403:   PetscCheck(ds->state<DS_STATE_CONDENSED || n==0 || n==ds->n || A[n+(n-1)*ld]==0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The given size would break a 2x2 block, call DSGetTruncateSize() first");
404: #endif
405:   if (trim) {
406:     if (ds->extrarow) {   /* clean extra row */
407:       for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
408:     }
409:     ds->l = 0;
410:     ds->k = 0;
411:     ds->n = n;
412:     ds->t = ds->n;   /* truncated length equal to the new dimension */
413:   } else {
414:     if (ds->extrarow && ds->k==ds->n) {
415:       /* copy entries of extra row to the new position, then clean last row */
416:       for (i=l;i<n;i++) A[n+i*ld] = A[ds->n+i*ld];
417:       for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
418:     }
419:     ds->k = (ds->extrarow)? n: 0;
420:     ds->t = ds->n;   /* truncated length equal to previous dimension */
421:     ds->n = n;
422:   }
423:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
424:   PetscFunctionReturn(PETSC_SUCCESS);
425: }

427: static PetscErrorCode DSCond_NHEP(DS ds,PetscReal *cond)
428: {
429:   PetscScalar    *work;
430:   PetscReal      *rwork;
431:   PetscBLASInt   *ipiv;
432:   PetscBLASInt   lwork,info,n,ld;
433:   PetscReal      hn,hin;
434:   PetscScalar    *A;

436:   PetscFunctionBegin;
437:   PetscCall(PetscBLASIntCast(ds->n,&n));
438:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
439:   lwork = 8*ld;
440:   PetscCall(DSAllocateWork_Private(ds,lwork,ld,ld));
441:   work  = ds->work;
442:   rwork = ds->rwork;
443:   ipiv  = ds->iwork;

445:   /* use workspace matrix W to avoid overwriting A */
446:   PetscCall(DSAllocateMat_Private(ds,DS_MAT_W));
447:   PetscCall(MatCopy(ds->omat[DS_MAT_A],ds->omat[DS_MAT_W],SAME_NONZERO_PATTERN));
448:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_W],&A));

450:   /* norm of A */
451:   if (ds->state<DS_STATE_INTERMEDIATE) hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
452:   else hn = LAPACKlanhs_("I",&n,A,&ld,rwork);

454:   /* norm of inv(A) */
455:   PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
456:   SlepcCheckLapackInfo("getrf",info);
457:   PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
458:   SlepcCheckLapackInfo("getri",info);
459:   hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);
460:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_W],&A));

462:   *cond = hn*hin;
463:   PetscFunctionReturn(PETSC_SUCCESS);
464: }

466: static PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gammaout)
467: {
468:   PetscInt          i,j;
469:   PetscBLASInt      *ipiv,info,n,ld,one=1,ncol;
470:   PetscScalar       *A,*B,*g=gin,*ghat,done=1.0,dmone=-1.0,dzero=0.0;
471:   const PetscScalar *Q;
472:   PetscReal         gamma=1.0;

474:   PetscFunctionBegin;
475:   PetscCall(PetscBLASIntCast(ds->n,&n));
476:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
477:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));

479:   if (!recover) {

481:     PetscCall(DSAllocateWork_Private(ds,0,0,ld));
482:     ipiv = ds->iwork;
483:     if (!g) {
484:       PetscCall(DSAllocateWork_Private(ds,ld,0,0));
485:       g = ds->work;
486:     }
487:     /* use workspace matrix W to factor A-tau*eye(n) */
488:     PetscCall(DSAllocateMat_Private(ds,DS_MAT_W));
489:     PetscCall(MatCopy(ds->omat[DS_MAT_A],ds->omat[DS_MAT_W],SAME_NONZERO_PATTERN));
490:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_W],&B));

492:     /* Vector g initially stores b = beta*e_n^T */
493:     PetscCall(PetscArrayzero(g,n));
494:     g[n-1] = beta;

496:     /* g = (A-tau*eye(n))'\b */
497:     for (i=0;i<n;i++) B[i+i*ld] -= tau;
498:     PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info));
499:     SlepcCheckLapackInfo("getrf",info);
500:     PetscCall(PetscLogFlops(2.0*n*n*n/3.0));
501:     PetscCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info));
502:     SlepcCheckLapackInfo("getrs",info);
503:     PetscCall(PetscLogFlops(2.0*n*n-n));
504:     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_W],&B));

506:     /* A = A + g*b' */
507:     for (i=0;i<n;i++) A[i+(n-1)*ld] += g[i]*beta;

509:   } else { /* recover */

511:     PetscCall(DSAllocateWork_Private(ds,ld,0,0));
512:     ghat = ds->work;
513:     PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_Q],&Q));

515:     /* g^ = -Q(:,idx)'*g */
516:     PetscCall(PetscBLASIntCast(ds->l+ds->k,&ncol));
517:     PetscCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one));

519:     /* A = A + g^*b' */
520:     for (i=0;i<ds->l+ds->k;i++)
521:       for (j=ds->l;j<ds->l+ds->k;j++)
522:         A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta;

524:     /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */
525:     PetscCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one));
526:     PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_Q],&Q));
527:   }

529:   /* Compute gamma factor */
530:   if (gammaout || (recover && ds->extrarow)) gamma = SlepcAbs(1.0,BLASnrm2_(&n,g,&one));
531:   if (gammaout) *gammaout = gamma;
532:   if (recover && ds->extrarow) {
533:     for (j=ds->l;j<ds->l+ds->k;j++) A[ds->n+j*ld] *= gamma;
534:   }
535:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
536:   PetscFunctionReturn(PETSC_SUCCESS);
537: }

539: static PetscErrorCode DSReallocate_NHEP(DS ds,PetscInt ld)
540: {
541:   PetscInt i,*perm=ds->perm;

543:   PetscFunctionBegin;
544:   for (i=0;i<DS_NUM_MAT;i++) {
545:     if (i!=DS_MAT_A && i!=DS_MAT_Q) PetscCall(MatDestroy(&ds->omat[i]));
546:   }

548:   PetscCall(DSReallocateMat_Private(ds,DS_MAT_A,ld));
549:   PetscCall(DSReallocateMat_Private(ds,DS_MAT_Q,ld));

551:   PetscCall(PetscMalloc1(ld,&ds->perm));
552:   PetscCall(PetscArraycpy(ds->perm,perm,ds->ld));
553:   PetscCall(PetscFree(perm));
554:   PetscFunctionReturn(PETSC_SUCCESS);
555: }

557: /*MC
558:    DSNHEP - Dense Non-Hermitian Eigenvalue Problem.

560:    Level: beginner

562:    Notes:
563:    The problem is expressed as A*X = X*Lambda, where A is the input matrix.
564:    Lambda is a diagonal matrix whose diagonal elements are the arguments of
565:    DSSolve(). After solve, A is overwritten with the upper quasi-triangular
566:    matrix T of the (real) Schur form, A*Q = Q*T.

568:    In the intermediate state A is reduced to upper Hessenberg form.

570:    Computation of left eigenvectors is supported, but two-sided Krylov solvers
571:    usually rely on the related DSNHEPTS.

573:    Used DS matrices:
574: +  DS_MAT_A - problem matrix
575: -  DS_MAT_Q - orthogonal/unitary transformation that reduces to Hessenberg form
576:    (intermediate step) or matrix of orthogonal Schur vectors

578:    Implemented methods:
579: .  0 - Implicit QR (_hseqr)

581: .seealso: DSCreate(), DSSetType(), DSType
582: M*/
583: SLEPC_EXTERN PetscErrorCode DSCreate_NHEP(DS ds)
584: {
585:   PetscFunctionBegin;
586:   ds->ops->allocate        = DSAllocate_NHEP;
587:   ds->ops->view            = DSView_NHEP;
588:   ds->ops->vectors         = DSVectors_NHEP;
589:   ds->ops->solve[0]        = DSSolve_NHEP;
590:   ds->ops->sort            = DSSort_NHEP;
591:   ds->ops->sortperm        = DSSortWithPermutation_NHEP;
592: #if !defined(PETSC_HAVE_MPIUNI)
593:   ds->ops->synchronize     = DSSynchronize_NHEP;
594: #endif
595:   ds->ops->gettruncatesize = DSGetTruncateSize_Default;
596:   ds->ops->truncate        = DSTruncate_NHEP;
597:   ds->ops->update          = DSUpdateExtraRow_NHEP;
598:   ds->ops->cond            = DSCond_NHEP;
599:   ds->ops->transharm       = DSTranslateHarmonic_NHEP;
600:   ds->ops->reallocate      = DSReallocate_NHEP;
601:   PetscFunctionReturn(PETSC_SUCCESS);
602: }