Actual source code: dshep.c
slepc-3.22.1 2024-10-28
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/dsimpl.h>
12: #include <slepcblaslapack.h>
14: static PetscErrorCode DSAllocate_HEP(DS ds,PetscInt ld)
15: {
16: PetscFunctionBegin;
17: if (!ds->compact) PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
18: PetscCall(DSAllocateMat_Private(ds,DS_MAT_Q));
19: PetscCall(DSAllocateMat_Private(ds,DS_MAT_T));
20: PetscCall(PetscFree(ds->perm));
21: PetscCall(PetscMalloc1(ld,&ds->perm));
22: PetscFunctionReturn(PETSC_SUCCESS);
23: }
25: /* 0 l k n-1
26: -----------------------------------------
27: |* . . |
28: | * . . |
29: | * . . |
30: | * . . |
31: |. . . . o o |
32: | o o |
33: | o o |
34: | o o |
35: | o o |
36: | o o |
37: |. . . . o o o o o o o x |
38: | x x x |
39: | x x x |
40: | x x x |
41: | x x x |
42: | x x x |
43: | x x x |
44: | x x x |
45: | x x x|
46: | x x|
47: -----------------------------------------
48: */
50: static PetscErrorCode DSSwitchFormat_HEP(DS ds)
51: {
52: PetscReal *T;
53: PetscScalar *A;
54: PetscInt i,n=ds->n,k=ds->k,ld=ds->ld;
56: PetscFunctionBegin;
57: /* switch from compact (arrow) to dense storage */
58: PetscCall(MatDenseGetArrayWrite(ds->omat[DS_MAT_A],&A));
59: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&T));
60: PetscCall(PetscArrayzero(A,ld*ld));
61: for (i=0;i<k;i++) {
62: A[i+i*ld] = T[i];
63: A[k+i*ld] = T[i+ld];
64: A[i+k*ld] = T[i+ld];
65: }
66: A[k+k*ld] = T[k];
67: for (i=k+1;i<n;i++) {
68: A[i+i*ld] = T[i];
69: A[i-1+i*ld] = T[i-1+ld];
70: A[i+(i-1)*ld] = T[i-1+ld];
71: }
72: if (ds->extrarow) A[n+(n-1)*ld] = T[n-1+ld];
73: PetscCall(MatDenseRestoreArrayWrite(ds->omat[DS_MAT_A],&A));
74: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&T));
75: PetscFunctionReturn(PETSC_SUCCESS);
76: }
78: static PetscErrorCode DSView_HEP(DS ds,PetscViewer viewer)
79: {
80: PetscViewerFormat format;
81: PetscInt i,j,r,c,rows;
82: PetscReal *T,value;
83: const char *methodname[] = {
84: "Implicit QR method (_steqr)",
85: "Relatively Robust Representations (_stevr)",
86: "Divide and Conquer method (_stedc)",
87: "Block Divide and Conquer method (dsbtdc)"
88: };
89: const int nmeth=PETSC_STATIC_ARRAY_LENGTH(methodname);
91: PetscFunctionBegin;
92: PetscCall(PetscViewerGetFormat(viewer,&format));
93: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
94: if (ds->bs>1) PetscCall(PetscViewerASCIIPrintf(viewer,"block size: %" PetscInt_FMT "\n",ds->bs));
95: if (ds->method<nmeth) PetscCall(PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]));
96: PetscFunctionReturn(PETSC_SUCCESS);
97: }
98: if (ds->compact) {
99: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&T));
100: PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
101: rows = ds->extrarow? ds->n+1: ds->n;
102: if (format == PETSC_VIEWER_ASCII_MATLAB) {
103: PetscCall(PetscViewerASCIIPrintf(viewer,"%% Size = %" PetscInt_FMT " %" PetscInt_FMT "\n",rows,ds->n));
104: PetscCall(PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",3);\n",3*ds->n));
105: PetscCall(PetscViewerASCIIPrintf(viewer,"zzz = [\n"));
106: for (i=0;i<ds->n;i++) PetscCall(PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",i+1,i+1,(double)T[i]));
107: for (i=0;i<rows-1;i++) {
108: r = PetscMax(i+2,ds->k+1);
109: c = i+1;
110: PetscCall(PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",r,c,(double)T[i+ds->ld]));
111: if (i<ds->n-1 && ds->k<ds->n) { /* do not print vertical arrow when k=n */
112: PetscCall(PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",c,r,(double)T[i+ds->ld]));
113: }
114: }
115: PetscCall(PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]));
116: } else {
117: for (i=0;i<rows;i++) {
118: for (j=0;j<ds->n;j++) {
119: if (i==j) value = T[i];
120: else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = T[PetscMin(i,j)+ds->ld];
121: else if (i==j+1 && i>ds->k) value = T[i-1+ds->ld];
122: else if (i+1==j && j>ds->k) value = T[j-1+ds->ld];
123: else value = 0.0;
124: PetscCall(PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value));
125: }
126: PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
127: }
128: }
129: PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
130: PetscCall(PetscViewerFlush(viewer));
131: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&T));
132: } else PetscCall(DSViewMat(ds,viewer,DS_MAT_A));
133: if (ds->state>DS_STATE_INTERMEDIATE) PetscCall(DSViewMat(ds,viewer,DS_MAT_Q));
134: PetscFunctionReturn(PETSC_SUCCESS);
135: }
137: static PetscErrorCode DSVectors_HEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
138: {
139: PetscScalar *Z;
140: const PetscScalar *Q;
141: PetscInt ld = ds->ld;
143: PetscFunctionBegin;
144: switch (mat) {
145: case DS_MAT_X:
146: case DS_MAT_Y:
147: if (j) {
148: PetscCall(MatDenseGetArray(ds->omat[mat],&Z));
149: if (ds->state>=DS_STATE_CONDENSED) {
150: PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_Q],&Q));
151: PetscCall(PetscArraycpy(Z+(*j)*ld,Q+(*j)*ld,ld));
152: if (rnorm) *rnorm = PetscAbsScalar(Q[ds->n-1+(*j)*ld]);
153: PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_Q],&Q));
154: } else {
155: PetscCall(PetscArrayzero(Z+(*j)*ld,ld));
156: Z[(*j)+(*j)*ld] = 1.0;
157: if (rnorm) *rnorm = 0.0;
158: }
159: PetscCall(MatDenseRestoreArray(ds->omat[mat],&Z));
160: } else {
161: if (ds->state>=DS_STATE_CONDENSED) PetscCall(MatCopy(ds->omat[DS_MAT_Q],ds->omat[mat],SAME_NONZERO_PATTERN));
162: else PetscCall(DSSetIdentity(ds,mat));
163: }
164: break;
165: case DS_MAT_U:
166: case DS_MAT_V:
167: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
168: default:
169: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
170: }
171: PetscFunctionReturn(PETSC_SUCCESS);
172: }
174: /*
175: ARROWTRIDIAG reduces a symmetric arrowhead matrix of the form
177: [ d 0 0 0 e ]
178: [ 0 d 0 0 e ]
179: A = [ 0 0 d 0 e ]
180: [ 0 0 0 d e ]
181: [ e e e e d ]
183: to tridiagonal form
185: [ d e 0 0 0 ]
186: [ e d e 0 0 ]
187: T = Q'*A*Q = [ 0 e d e 0 ],
188: [ 0 0 e d e ]
189: [ 0 0 0 e d ]
191: where Q is an orthogonal matrix. Rutishauser's algorithm is used to
192: perform the reduction, which requires O(n**2) flops. The accumulation
193: of the orthogonal factor Q, however, requires O(n**3) flops.
195: Arguments
196: =========
198: N (input) INTEGER
199: The order of the matrix A. N >= 0.
201: D (input/output) DOUBLE PRECISION array, dimension (N)
202: On entry, the diagonal entries of the matrix A to be
203: reduced.
204: On exit, the diagonal entries of the reduced matrix T.
206: E (input/output) DOUBLE PRECISION array, dimension (N-1)
207: On entry, the off-diagonal entries of the matrix A to be
208: reduced.
209: On exit, the subdiagonal entries of the reduced matrix T.
211: Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
212: On exit, the orthogonal matrix Q.
214: LDQ (input) INTEGER
215: The leading dimension of the array Q.
217: Note
218: ====
219: Based on Fortran code contributed by Daniel Kressner
220: */
221: PetscErrorCode DSArrowTridiag(PetscBLASInt n,PetscReal *d,PetscReal *e,PetscScalar *Q,PetscBLASInt ld)
222: {
223: PetscBLASInt i,j,j2,one=1;
224: PetscReal c,s,p,off,temp;
226: PetscFunctionBegin;
227: if (n<=2) PetscFunctionReturn(PETSC_SUCCESS);
229: for (j=0;j<n-2;j++) {
231: /* Eliminate entry e(j) by a rotation in the planes (j,j+1) */
232: temp = e[j+1];
233: PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&e[j],&c,&s,&e[j+1]));
234: s = -s;
236: /* Apply rotation to diagonal elements */
237: temp = d[j+1];
238: e[j] = c*s*(temp-d[j]);
239: d[j+1] = s*s*d[j] + c*c*temp;
240: d[j] = c*c*d[j] + s*s*temp;
242: /* Apply rotation to Q */
243: j2 = j+2;
244: PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+j*ld,&one,Q+(j+1)*ld,&one,&c,&s));
246: /* Chase newly introduced off-diagonal entry to the top left corner */
247: for (i=j-1;i>=0;i--) {
248: off = -s*e[i];
249: e[i] = c*e[i];
250: temp = e[i+1];
251: PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&off,&c,&s,&e[i+1]));
252: s = -s;
253: temp = (d[i]-d[i+1])*s - 2.0*c*e[i];
254: p = s*temp;
255: d[i+1] += p;
256: d[i] -= p;
257: e[i] = -e[i] - c*temp;
258: PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+i*ld,&one,Q+(i+1)*ld,&one,&c,&s));
259: }
260: }
261: PetscFunctionReturn(PETSC_SUCCESS);
262: }
264: /*
265: Reduce to tridiagonal form by means of DSArrowTridiag.
266: */
267: static PetscErrorCode DSIntermediate_HEP(DS ds)
268: {
269: PetscInt i;
270: PetscBLASInt n1 = 0,n2,lwork,info,l = 0,n = 0,ld,off;
271: PetscScalar *Q,*work,*tau;
272: const PetscScalar *A;
273: PetscReal *d,*e;
274: Mat At,Qt; /* trailing submatrices */
276: PetscFunctionBegin;
277: PetscCall(PetscBLASIntCast(ds->n,&n));
278: PetscCall(PetscBLASIntCast(ds->l,&l));
279: PetscCall(PetscBLASIntCast(ds->ld,&ld));
280: PetscCall(PetscBLASIntCast(PetscMax(0,ds->k-l+1),&n1)); /* size of leading block, excl. locked */
281: n2 = n-l; /* n2 = n1 + size of trailing block */
282: off = l+l*ld;
283: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
284: e = d+ld;
285: PetscCall(DSSetIdentity(ds,DS_MAT_Q));
286: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
288: if (ds->compact) {
290: if (ds->state<DS_STATE_INTERMEDIATE) PetscCall(DSArrowTridiag(n1,d+l,e+l,Q+off,ld));
292: } else {
294: PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_A],&A));
295: for (i=0;i<l;i++) { d[i] = PetscRealPart(A[i+i*ld]); e[i] = 0.0; }
297: if (ds->state<DS_STATE_INTERMEDIATE) {
298: PetscCall(MatDenseGetSubMatrix(ds->omat[DS_MAT_A],ds->l,ds->n,ds->l,ds->n,&At));
299: PetscCall(MatDenseGetSubMatrix(ds->omat[DS_MAT_Q],ds->l,ds->n,ds->l,ds->n,&Qt));
300: PetscCall(MatCopy(At,Qt,SAME_NONZERO_PATTERN));
301: PetscCall(MatDenseRestoreSubMatrix(ds->omat[DS_MAT_A],&At));
302: PetscCall(MatDenseRestoreSubMatrix(ds->omat[DS_MAT_Q],&Qt));
303: PetscCall(DSAllocateWork_Private(ds,ld+ld*ld,0,0));
304: tau = ds->work;
305: work = ds->work+ld;
306: lwork = ld*ld;
307: PetscCallBLAS("LAPACKsytrd",LAPACKsytrd_("L",&n2,Q+off,&ld,d+l,e+l,tau,work,&lwork,&info));
308: SlepcCheckLapackInfo("sytrd",info);
309: PetscCallBLAS("LAPACKorgtr",LAPACKorgtr_("L",&n2,Q+off,&ld,tau,work,&lwork,&info));
310: SlepcCheckLapackInfo("orgtr",info);
311: } else {
312: /* copy tridiagonal to d,e */
313: for (i=l;i<n;i++) d[i] = PetscRealPart(A[i+i*ld]);
314: for (i=l;i<n-1;i++) e[i] = PetscRealPart(A[(i+1)+i*ld]);
315: }
316: PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_A],&A));
317: }
318: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
319: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
320: PetscFunctionReturn(PETSC_SUCCESS);
321: }
323: static PetscErrorCode DSSort_HEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
324: {
325: PetscInt n,l,i,*perm,ld=ds->ld;
326: PetscScalar *A;
327: PetscReal *d;
329: PetscFunctionBegin;
330: if (!ds->sc) PetscFunctionReturn(PETSC_SUCCESS);
331: n = ds->n;
332: l = ds->l;
333: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
334: perm = ds->perm;
335: if (!rr) PetscCall(DSSortEigenvaluesReal_Private(ds,d,perm));
336: else PetscCall(DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_FALSE));
337: for (i=l;i<n;i++) wr[i] = d[perm[i]];
338: PetscCall(DSPermuteColumns_Private(ds,l,n,n,DS_MAT_Q,perm));
339: for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
340: if (!ds->compact) {
341: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
342: for (i=l;i<n;i++) A[i+i*ld] = wr[i];
343: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
344: }
345: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
346: PetscFunctionReturn(PETSC_SUCCESS);
347: }
349: static PetscErrorCode DSUpdateExtraRow_HEP(DS ds)
350: {
351: PetscInt i;
352: PetscBLASInt n,ld,incx=1;
353: PetscScalar *A,*x,*y,one=1.0,zero=0.0;
354: PetscReal *T,*e,beta;
355: const PetscScalar *Q;
357: PetscFunctionBegin;
358: PetscCall(PetscBLASIntCast(ds->n,&n));
359: PetscCall(PetscBLASIntCast(ds->ld,&ld));
360: PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_Q],&Q));
361: if (ds->compact) {
362: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&T));
363: e = T+ld;
364: beta = e[n-1]; /* in compact, we assume all entries are zero except the last one */
365: for (i=0;i<n;i++) e[i] = PetscRealPart(beta*Q[n-1+i*ld]);
366: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&T));
367: ds->k = n;
368: } else {
369: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
370: PetscCall(DSAllocateWork_Private(ds,2*ld,0,0));
371: x = ds->work;
372: y = ds->work+ld;
373: for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
374: PetscCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
375: for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
376: ds->k = n;
377: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
378: }
379: PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_Q],&Q));
380: PetscFunctionReturn(PETSC_SUCCESS);
381: }
383: static PetscErrorCode DSSolve_HEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
384: {
385: PetscInt i;
386: PetscBLASInt n1,info,l = 0,n = 0,ld,off;
387: PetscScalar *Q,*A;
388: PetscReal *d,*e;
390: PetscFunctionBegin;
391: PetscCheck(ds->bs==1,PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for bs>1");
392: PetscCall(PetscBLASIntCast(ds->n,&n));
393: PetscCall(PetscBLASIntCast(ds->l,&l));
394: PetscCall(PetscBLASIntCast(ds->ld,&ld));
395: n1 = n-l; /* n1 = size of leading block, excl. locked + size of trailing block */
396: off = l+l*ld;
397: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
398: e = d+ld;
400: /* Reduce to tridiagonal form */
401: PetscCall(DSIntermediate_HEP(ds));
403: /* Solve the tridiagonal eigenproblem */
404: for (i=0;i<l;i++) wr[i] = d[i];
406: PetscCall(DSAllocateWork_Private(ds,0,2*ld,0));
407: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
408: PetscCallBLAS("LAPACKsteqr",LAPACKsteqr_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&info));
409: SlepcCheckLapackInfo("steqr",info);
410: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
411: for (i=l;i<n;i++) wr[i] = d[i];
413: /* Create diagonal matrix as a result */
414: if (ds->compact) PetscCall(PetscArrayzero(e,n-1));
415: else {
416: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
417: for (i=l;i<n;i++) PetscCall(PetscArrayzero(A+l+i*ld,n-l));
418: for (i=l;i<n;i++) A[i+i*ld] = d[i];
419: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
420: }
421: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
423: /* Set zero wi */
424: if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
425: PetscFunctionReturn(PETSC_SUCCESS);
426: }
428: static PetscErrorCode DSSolve_HEP_MRRR(DS ds,PetscScalar *wr,PetscScalar *wi)
429: {
430: Mat At,Qt; /* trailing submatrices */
431: PetscInt i;
432: PetscBLASInt n1 = 0,n2 = 0,n3,lrwork,liwork,info,l = 0,n = 0,m = 0,ld,off,il,iu,*isuppz;
433: PetscScalar *A,*Q,*W=NULL,one=1.0,zero=0.0;
434: PetscReal *d,*e,abstol=0.0,vl,vu;
435: #if defined(PETSC_USE_COMPLEX)
436: PetscInt j;
437: PetscReal *Qr,*ritz;
438: #endif
440: PetscFunctionBegin;
441: PetscCheck(ds->bs==1,PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for bs>1");
442: PetscCall(PetscBLASIntCast(ds->n,&n));
443: PetscCall(PetscBLASIntCast(ds->l,&l));
444: PetscCall(PetscBLASIntCast(ds->ld,&ld));
445: PetscCall(PetscBLASIntCast(ds->k-l+1,&n1)); /* size of leading block, excl. locked */
446: PetscCall(PetscBLASIntCast(n-ds->k-1,&n2)); /* size of trailing block */
447: n3 = n1+n2;
448: off = l+l*ld;
449: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
450: e = d+ld;
452: /* Reduce to tridiagonal form */
453: PetscCall(DSIntermediate_HEP(ds));
455: /* Solve the tridiagonal eigenproblem */
456: for (i=0;i<l;i++) wr[i] = d[i];
458: if (ds->state<DS_STATE_INTERMEDIATE) { /* Q contains useful info */
459: PetscCall(DSAllocateMat_Private(ds,DS_MAT_W));
460: PetscCall(MatCopy(ds->omat[DS_MAT_Q],ds->omat[DS_MAT_W],SAME_NONZERO_PATTERN));
461: }
462: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
463: lrwork = 20*ld;
464: liwork = 10*ld;
465: #if defined(PETSC_USE_COMPLEX)
466: PetscCall(DSAllocateWork_Private(ds,0,lrwork+ld+ld*ld,liwork+2*ld));
467: #else
468: PetscCall(DSAllocateWork_Private(ds,0,lrwork+ld,liwork+2*ld));
469: #endif
470: isuppz = ds->iwork+liwork;
471: #if defined(PETSC_USE_COMPLEX)
472: ritz = ds->rwork+lrwork;
473: Qr = ds->rwork+lrwork+ld;
474: PetscCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,ritz+l,Qr+off,&ld,isuppz,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
475: for (i=l;i<n;i++) wr[i] = ritz[i];
476: #else
477: PetscCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,wr+l,Q+off,&ld,isuppz,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
478: #endif
479: SlepcCheckLapackInfo("stevr",info);
480: #if defined(PETSC_USE_COMPLEX)
481: for (i=l;i<n;i++)
482: for (j=l;j<n;j++)
483: Q[i+j*ld] = Qr[i+j*ld];
484: #endif
485: if (ds->state<DS_STATE_INTERMEDIATE) { /* accumulate previous Q */
486: if (ds->compact) PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
487: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
488: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_W],&W));
489: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n3,&n3,&n3,&one,W+off,&ld,Q+off,&ld,&zero,A+off,&ld));
490: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
491: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_W],&W));
492: PetscCall(MatDenseGetSubMatrix(ds->omat[DS_MAT_A],ds->l,ds->n,ds->l,ds->n,&At));
493: PetscCall(MatDenseGetSubMatrix(ds->omat[DS_MAT_Q],ds->l,ds->n,ds->l,ds->n,&Qt));
494: PetscCall(MatCopy(At,Qt,SAME_NONZERO_PATTERN));
495: PetscCall(MatDenseRestoreSubMatrix(ds->omat[DS_MAT_A],&At));
496: PetscCall(MatDenseRestoreSubMatrix(ds->omat[DS_MAT_Q],&Qt));
497: }
498: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
499: for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
501: /* Create diagonal matrix as a result */
502: if (ds->compact) PetscCall(PetscArrayzero(e,n-1));
503: else {
504: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
505: for (i=l;i<n;i++) PetscCall(PetscArrayzero(A+l+i*ld,n-l));
506: for (i=l;i<n;i++) A[i+i*ld] = d[i];
507: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
508: }
509: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
511: /* Set zero wi */
512: if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
513: PetscFunctionReturn(PETSC_SUCCESS);
514: }
516: static PetscErrorCode DSSolve_HEP_DC(DS ds,PetscScalar *wr,PetscScalar *wi)
517: {
518: PetscInt i;
519: PetscBLASInt n1,info,l = 0,ld,off,lrwork,liwork;
520: PetscScalar *Q,*A;
521: PetscReal *d,*e;
522: #if defined(PETSC_USE_COMPLEX)
523: PetscBLASInt lwork;
524: PetscInt j;
525: #endif
527: PetscFunctionBegin;
528: PetscCheck(ds->bs==1,PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for bs>1");
529: PetscCall(PetscBLASIntCast(ds->l,&l));
530: PetscCall(PetscBLASIntCast(ds->ld,&ld));
531: PetscCall(PetscBLASIntCast(ds->n-ds->l,&n1));
532: off = l+l*ld;
533: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
534: e = d+ld;
536: /* Reduce to tridiagonal form */
537: PetscCall(DSIntermediate_HEP(ds));
539: /* Solve the tridiagonal eigenproblem */
540: for (i=0;i<l;i++) wr[i] = d[i];
542: lrwork = 5*n1*n1+3*n1+1;
543: liwork = 5*n1*n1+6*n1+6;
544: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
545: #if !defined(PETSC_USE_COMPLEX)
546: PetscCall(DSAllocateWork_Private(ds,0,lrwork,liwork));
547: PetscCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
548: #else
549: lwork = ld*ld;
550: PetscCall(DSAllocateWork_Private(ds,lwork,lrwork,liwork));
551: PetscCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->work,&lwork,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
552: /* Fixing Lapack bug*/
553: for (j=ds->l;j<ds->n;j++)
554: for (i=0;i<ds->l;i++) Q[i+j*ld] = 0.0;
555: #endif
556: SlepcCheckLapackInfo("stedc",info);
557: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
558: for (i=l;i<ds->n;i++) wr[i] = d[i];
560: /* Create diagonal matrix as a result */
561: if (ds->compact) PetscCall(PetscArrayzero(e,ds->n-1));
562: else {
563: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
564: for (i=l;i<ds->n;i++) PetscCall(PetscArrayzero(A+l+i*ld,ds->n-l));
565: for (i=l;i<ds->n;i++) A[i+i*ld] = d[i];
566: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
567: }
568: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
570: /* Set zero wi */
571: if (wi) for (i=l;i<ds->n;i++) wi[i] = 0.0;
572: PetscFunctionReturn(PETSC_SUCCESS);
573: }
575: #if !defined(PETSC_USE_COMPLEX)
576: static PetscErrorCode DSSolve_HEP_BDC(DS ds,PetscScalar *wr,PetscScalar *wi)
577: {
578: PetscBLASInt i,j,k,m,n = 0,info,nblks,bs = 0,ld = 0,lde,lrwork,liwork,*ksizes,*iwork,mingapi;
579: PetscScalar *Q,*A;
580: PetscReal *D,*E,*d,*e,tol=PETSC_MACHINE_EPSILON/2,tau1=1e-16,tau2=1e-18,*rwork,mingap;
582: PetscFunctionBegin;
583: PetscCheck(ds->l==0,PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for l>1");
584: PetscCheck(!ds->compact,PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented for compact storage");
585: PetscCall(PetscBLASIntCast(ds->ld,&ld));
586: PetscCall(PetscBLASIntCast(ds->bs,&bs));
587: PetscCall(PetscBLASIntCast(ds->n,&n));
588: nblks = n/bs;
589: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
590: e = d+ld;
591: lrwork = 4*n*n+60*n+1;
592: liwork = 5*n+5*nblks-1;
593: lde = 2*bs+1;
594: PetscCall(DSAllocateWork_Private(ds,bs*n+lde*lde*(nblks-1),lrwork,nblks+liwork));
595: D = ds->work;
596: E = ds->work+bs*n;
597: rwork = ds->rwork;
598: ksizes = ds->iwork;
599: iwork = ds->iwork+nblks;
600: PetscCall(PetscArrayzero(iwork,liwork));
602: /* Copy matrix to block tridiagonal format */
603: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
604: j=0;
605: for (i=0;i<nblks;i++) {
606: ksizes[i]=bs;
607: for (k=0;k<bs;k++)
608: for (m=0;m<bs;m++)
609: D[k+m*bs+i*bs*bs] = PetscRealPart(A[j+k+(j+m)*n]);
610: j = j + bs;
611: }
612: j=0;
613: for (i=0;i<nblks-1;i++) {
614: for (k=0;k<bs;k++)
615: for (m=0;m<bs;m++)
616: E[k+m*lde+i*lde*lde] = PetscRealPart(A[j+bs+k+(j+m)*n]);
617: j = j + bs;
618: }
619: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
621: /* Solve the block tridiagonal eigenproblem */
622: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
623: PetscCall(BDC_dsbtdc_("D","A",n,nblks,ksizes,D,bs,bs,E,lde,lde,tol,tau1,tau2,d,Q,n,rwork,lrwork,iwork,liwork,&mingap,&mingapi,&info,1,1));
624: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
625: for (i=0;i<ds->n;i++) wr[i] = d[i];
627: /* Create diagonal matrix as a result */
628: if (ds->compact) PetscCall(PetscArrayzero(e,ds->n-1));
629: else {
630: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
631: for (i=0;i<ds->n;i++) PetscCall(PetscArrayzero(A+i*ld,ds->n));
632: for (i=0;i<ds->n;i++) A[i+i*ld] = wr[i];
633: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
634: }
635: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
637: /* Set zero wi */
638: if (wi) for (i=0;i<ds->n;i++) wi[i] = 0.0;
639: PetscFunctionReturn(PETSC_SUCCESS);
640: }
641: #endif
643: static PetscErrorCode DSTruncate_HEP(DS ds,PetscInt n,PetscBool trim)
644: {
645: PetscInt i,ld=ds->ld,l=ds->l;
646: PetscScalar *A;
648: PetscFunctionBegin;
649: if (!ds->compact && ds->extrarow) PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
650: if (trim) {
651: if (!ds->compact && ds->extrarow) { /* clean extra row */
652: for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
653: }
654: ds->l = 0;
655: ds->k = 0;
656: ds->n = n;
657: ds->t = ds->n; /* truncated length equal to the new dimension */
658: } else {
659: if (!ds->compact && ds->extrarow && ds->k==ds->n) {
660: /* copy entries of extra row to the new position, then clean last row */
661: for (i=l;i<n;i++) A[n+i*ld] = A[ds->n+i*ld];
662: for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
663: }
664: ds->k = (ds->extrarow)? n: 0;
665: ds->t = ds->n; /* truncated length equal to previous dimension */
666: ds->n = n;
667: }
668: if (!ds->compact && ds->extrarow) PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
669: PetscFunctionReturn(PETSC_SUCCESS);
670: }
672: #if !defined(PETSC_HAVE_MPIUNI)
673: static PetscErrorCode DSSynchronize_HEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
674: {
675: PetscInt ld=ds->ld,l=ds->l,k=0,kr=0;
676: PetscMPIInt n,rank,off=0,size,ldn,ld3;
677: PetscScalar *A,*Q;
678: PetscReal *T;
680: PetscFunctionBegin;
681: if (ds->compact) kr = 3*ld;
682: else k = (ds->n-l)*ld;
683: if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
684: if (eigr) k += (ds->n-l);
685: PetscCall(DSAllocateWork_Private(ds,k+kr,0,0));
686: PetscCall(PetscMPIIntCast(k*sizeof(PetscScalar)+kr*sizeof(PetscReal),&size));
687: PetscCall(PetscMPIIntCast(ds->n-l,&n));
688: PetscCall(PetscMPIIntCast(ld*(ds->n-l),&ldn));
689: PetscCall(PetscMPIIntCast(ld*3,&ld3));
690: if (ds->compact) PetscCall(DSGetArrayReal(ds,DS_MAT_T,&T));
691: else PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
692: if (ds->state>DS_STATE_RAW) PetscCall(MatDenseGetArray(ds->omat[DS_MAT_Q],&Q));
693: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank));
694: if (!rank) {
695: if (ds->compact) PetscCallMPI(MPI_Pack(T,ld3,MPIU_REAL,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
696: else PetscCallMPI(MPI_Pack(A+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
697: if (ds->state>DS_STATE_RAW) PetscCallMPI(MPI_Pack(Q+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
698: if (eigr) PetscCallMPI(MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
699: }
700: PetscCallMPI(MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds)));
701: if (rank) {
702: if (ds->compact) PetscCallMPI(MPI_Unpack(ds->work,size,&off,T,ld3,MPIU_REAL,PetscObjectComm((PetscObject)ds)));
703: else PetscCallMPI(MPI_Unpack(ds->work,size,&off,A+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
704: if (ds->state>DS_STATE_RAW) PetscCallMPI(MPI_Unpack(ds->work,size,&off,Q+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
705: if (eigr) PetscCallMPI(MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
706: }
707: if (ds->compact) PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&T));
708: else PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
709: if (ds->state>DS_STATE_RAW) PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_Q],&Q));
710: PetscFunctionReturn(PETSC_SUCCESS);
711: }
712: #endif
714: static PetscErrorCode DSCond_HEP(DS ds,PetscReal *cond)
715: {
716: PetscScalar *work;
717: PetscReal *rwork;
718: PetscBLASInt *ipiv;
719: PetscBLASInt lwork,info,n,ld;
720: PetscReal hn,hin;
721: PetscScalar *A;
723: PetscFunctionBegin;
724: PetscCall(PetscBLASIntCast(ds->n,&n));
725: PetscCall(PetscBLASIntCast(ds->ld,&ld));
726: lwork = 8*ld;
727: PetscCall(DSAllocateWork_Private(ds,lwork,ld,ld));
728: work = ds->work;
729: rwork = ds->rwork;
730: ipiv = ds->iwork;
731: if (ds->compact) PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
732: PetscCall(DSSwitchFormat_HEP(ds));
734: /* use workspace matrix W to avoid overwriting A */
735: PetscCall(DSAllocateMat_Private(ds,DS_MAT_W));
736: PetscCall(MatCopy(ds->omat[DS_MAT_A],ds->omat[DS_MAT_W],SAME_NONZERO_PATTERN));
737: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_W],&A));
739: /* norm of A */
740: hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
742: /* norm of inv(A) */
743: PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
744: SlepcCheckLapackInfo("getrf",info);
745: PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
746: SlepcCheckLapackInfo("getri",info);
747: hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);
748: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_W],&A));
750: *cond = hn*hin;
751: PetscFunctionReturn(PETSC_SUCCESS);
752: }
754: static PetscErrorCode DSTranslateRKS_HEP(DS ds,PetscScalar alpha)
755: {
756: PetscInt i,j,k=ds->k;
757: PetscScalar *Q,*A,*R,*tau,*work;
758: PetscBLASInt ld,n1,n0,lwork,info;
760: PetscFunctionBegin;
761: PetscCall(PetscBLASIntCast(ds->ld,&ld));
762: PetscCall(DSAllocateWork_Private(ds,ld*ld,0,0));
763: tau = ds->work;
764: work = ds->work+ld;
765: PetscCall(PetscBLASIntCast(ld*(ld-1),&lwork));
766: PetscCall(DSAllocateMat_Private(ds,DS_MAT_W));
767: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
768: PetscCall(MatDenseGetArrayWrite(ds->omat[DS_MAT_Q],&Q));
769: PetscCall(MatDenseGetArrayWrite(ds->omat[DS_MAT_W],&R));
771: /* copy I+alpha*A */
772: PetscCall(PetscArrayzero(Q,ld*ld));
773: PetscCall(PetscArrayzero(R,ld*ld));
774: for (i=0;i<k;i++) {
775: Q[i+i*ld] = 1.0 + alpha*A[i+i*ld];
776: Q[k+i*ld] = alpha*A[k+i*ld];
777: }
779: /* compute qr */
780: PetscCall(PetscBLASIntCast(k+1,&n1));
781: PetscCall(PetscBLASIntCast(k,&n0));
782: PetscCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&n1,&n0,Q,&ld,tau,work,&lwork,&info));
783: SlepcCheckLapackInfo("geqrf",info);
785: /* copy R from Q */
786: for (j=0;j<k;j++)
787: for (i=0;i<=j;i++)
788: R[i+j*ld] = Q[i+j*ld];
790: /* compute orthogonal matrix in Q */
791: PetscCallBLAS("LAPACKorgqr",LAPACKorgqr_(&n1,&n1,&n0,Q,&ld,tau,work,&lwork,&info));
792: SlepcCheckLapackInfo("orgqr",info);
794: /* compute the updated matrix of projected problem */
795: for (j=0;j<k;j++)
796: for (i=0;i<k+1;i++)
797: A[j*ld+i] = Q[i*ld+j];
798: alpha = -1.0/alpha;
799: PetscCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n1,&n0,&alpha,R,&ld,A,&ld));
800: for (i=0;i<k;i++)
801: A[ld*i+i] -= alpha;
803: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
804: PetscCall(MatDenseRestoreArrayWrite(ds->omat[DS_MAT_Q],&Q));
805: PetscCall(MatDenseRestoreArrayWrite(ds->omat[DS_MAT_W],&R));
806: PetscFunctionReturn(PETSC_SUCCESS);
807: }
809: static PetscErrorCode DSHermitian_HEP(DS ds,DSMatType m,PetscBool *flg)
810: {
811: PetscFunctionBegin;
812: if (m==DS_MAT_A && !ds->extrarow) *flg = PETSC_TRUE;
813: else *flg = PETSC_FALSE;
814: PetscFunctionReturn(PETSC_SUCCESS);
815: }
817: static PetscErrorCode DSSetCompact_HEP(DS ds,PetscBool comp)
818: {
819: PetscFunctionBegin;
820: if (!comp) PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
821: PetscFunctionReturn(PETSC_SUCCESS);
822: }
824: /*MC
825: DSHEP - Dense Hermitian Eigenvalue Problem.
827: Level: beginner
829: Notes:
830: The problem is expressed as A*X = X*Lambda, where A is real symmetric
831: (or complex Hermitian). Lambda is a diagonal matrix whose diagonal
832: elements are the arguments of DSSolve(). After solve, A is overwritten
833: with Lambda.
835: In the intermediate state A is reduced to tridiagonal form. In compact
836: storage format, the symmetric tridiagonal matrix is stored in T.
838: Used DS matrices:
839: + DS_MAT_A - problem matrix (used only if compact=false)
840: . DS_MAT_T - symmetric tridiagonal matrix
841: - DS_MAT_Q - orthogonal/unitary transformation that reduces to tridiagonal form
842: (intermediate step) or matrix of orthogonal eigenvectors, which is equal to X
844: Implemented methods:
845: + 0 - Implicit QR (_steqr)
846: . 1 - Multiple Relatively Robust Representations (_stevr)
847: . 2 - Divide and Conquer (_stedc)
848: - 3 - Block Divide and Conquer (real scalars only)
850: .seealso: DSCreate(), DSSetType(), DSType
851: M*/
852: SLEPC_EXTERN PetscErrorCode DSCreate_HEP(DS ds)
853: {
854: PetscFunctionBegin;
855: ds->ops->allocate = DSAllocate_HEP;
856: ds->ops->view = DSView_HEP;
857: ds->ops->vectors = DSVectors_HEP;
858: ds->ops->solve[0] = DSSolve_HEP_QR;
859: ds->ops->solve[1] = DSSolve_HEP_MRRR;
860: ds->ops->solve[2] = DSSolve_HEP_DC;
861: #if !defined(PETSC_USE_COMPLEX)
862: ds->ops->solve[3] = DSSolve_HEP_BDC;
863: #endif
864: ds->ops->sort = DSSort_HEP;
865: ds->ops->truncate = DSTruncate_HEP;
866: ds->ops->update = DSUpdateExtraRow_HEP;
867: ds->ops->cond = DSCond_HEP;
868: ds->ops->transrks = DSTranslateRKS_HEP;
869: ds->ops->hermitian = DSHermitian_HEP;
870: #if !defined(PETSC_HAVE_MPIUNI)
871: ds->ops->synchronize = DSSynchronize_HEP;
872: #endif
873: ds->ops->setcompact = DSSetCompact_HEP;
874: PetscFunctionReturn(PETSC_SUCCESS);
875: }