Actual source code: dssvd.c
slepc-main 2024-12-17
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/dsimpl.h>
12: #include <slepcblaslapack.h>
14: typedef struct {
15: PetscInt m; /* number of columns */
16: PetscInt t; /* number of rows of V after truncating */
17: } DS_SVD;
19: static PetscErrorCode DSAllocate_SVD(DS ds,PetscInt ld)
20: {
21: PetscFunctionBegin;
22: if (!ds->compact) PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
23: PetscCall(DSAllocateMat_Private(ds,DS_MAT_U));
24: PetscCall(DSAllocateMat_Private(ds,DS_MAT_V));
25: PetscCall(DSAllocateMat_Private(ds,DS_MAT_T));
26: PetscCall(PetscFree(ds->perm));
27: PetscCall(PetscMalloc1(ld,&ds->perm));
28: PetscFunctionReturn(PETSC_SUCCESS);
29: }
31: /* 0 l k m-1
32: -----------------------------------------
33: |* . . |
34: | * . . |
35: | * . . |
36: | * . . |
37: | o o |
38: | o o |
39: | o o |
40: | o o |
41: | o o |
42: | o o |
43: | o x |
44: | x x |
45: | x x |
46: | x x |
47: | x x |
48: | x x |
49: | x x |
50: | x x |
51: | x x|
52: n-1 | x|
53: -----------------------------------------
54: */
56: static PetscErrorCode DSSwitchFormat_SVD(DS ds)
57: {
58: DS_SVD *ctx = (DS_SVD*)ds->data;
59: PetscReal *T;
60: PetscScalar *A;
61: PetscInt i,m=ctx->m,k=ds->k,ld=ds->ld;
63: PetscFunctionBegin;
64: PetscCheck(m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
65: PetscCheck(ds->compact,PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Must have compact storage");
66: /* switch from compact (arrow) to dense storage */
67: PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
68: PetscCall(MatDenseGetArrayWrite(ds->omat[DS_MAT_A],&A));
69: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&T));
70: PetscCall(PetscArrayzero(A,ld*ld));
71: for (i=0;i<k;i++) {
72: A[i+i*ld] = T[i];
73: A[i+k*ld] = T[i+ld];
74: }
75: A[k+k*ld] = T[k];
76: for (i=k+1;i<m;i++) {
77: A[i+i*ld] = T[i];
78: A[i-1+i*ld] = T[i-1+ld];
79: }
80: PetscCall(MatDenseRestoreArrayWrite(ds->omat[DS_MAT_A],&A));
81: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&T));
82: PetscFunctionReturn(PETSC_SUCCESS);
83: }
85: static PetscErrorCode DSView_SVD(DS ds,PetscViewer viewer)
86: {
87: DS_SVD *ctx = (DS_SVD*)ds->data;
88: PetscViewerFormat format;
89: PetscInt i,j,r,c,m=ctx->m,rows,cols;
90: PetscReal *T,value;
91: const char *methodname[] = {
92: "Implicit zero-shift QR for bidiagonals (_bdsqr)",
93: "Divide and Conquer (_bdsdc or _gesdd)"
94: };
95: const int nmeth=PETSC_STATIC_ARRAY_LENGTH(methodname);
97: PetscFunctionBegin;
98: PetscCall(PetscViewerGetFormat(viewer,&format));
99: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
100: PetscCall(PetscViewerASCIIPrintf(viewer,"number of columns: %" PetscInt_FMT "\n",m));
101: if (ds->method<nmeth) PetscCall(PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]));
102: PetscFunctionReturn(PETSC_SUCCESS);
103: }
104: PetscCheck(m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
105: if (ds->compact) {
106: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&T));
107: PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
108: rows = ds->n;
109: cols = ds->extrarow? m+1: m;
110: if (format == PETSC_VIEWER_ASCII_MATLAB) {
111: PetscCall(PetscViewerASCIIPrintf(viewer,"%% Size = %" PetscInt_FMT " %" PetscInt_FMT "\n",rows,cols));
112: PetscCall(PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",3);\n",2*ds->n));
113: PetscCall(PetscViewerASCIIPrintf(viewer,"zzz = [\n"));
114: for (i=0;i<PetscMin(ds->n,m);i++) PetscCall(PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",i+1,i+1,(double)T[i]));
115: for (i=0;i<cols-1;i++) {
116: r = PetscMax(i+2,ds->k+1);
117: c = i+1;
118: PetscCall(PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",c,r,(double)T[i+ds->ld]));
119: }
120: PetscCall(PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]));
121: } else {
122: for (i=0;i<rows;i++) {
123: for (j=0;j<cols;j++) {
124: if (i==j) value = T[i];
125: else if (i<ds->k && j==ds->k) value = T[PetscMin(i,j)+ds->ld];
126: else if (i+1==j && i>=ds->k) value = T[i+ds->ld];
127: else value = 0.0;
128: PetscCall(PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value));
129: }
130: PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
131: }
132: }
133: PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
134: PetscCall(PetscViewerFlush(viewer));
135: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&T));
136: } else PetscCall(DSViewMat(ds,viewer,DS_MAT_A));
137: if (ds->state>DS_STATE_INTERMEDIATE) {
138: PetscCall(DSViewMat(ds,viewer,DS_MAT_U));
139: PetscCall(DSViewMat(ds,viewer,DS_MAT_V));
140: }
141: PetscFunctionReturn(PETSC_SUCCESS);
142: }
144: static PetscErrorCode DSVectors_SVD(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
145: {
146: PetscFunctionBegin;
147: switch (mat) {
148: case DS_MAT_U:
149: case DS_MAT_V:
150: if (rnorm) *rnorm = 0.0;
151: break;
152: default:
153: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
154: }
155: PetscFunctionReturn(PETSC_SUCCESS);
156: }
158: static PetscErrorCode DSSort_SVD(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
159: {
160: DS_SVD *ctx = (DS_SVD*)ds->data;
161: PetscInt n,l,i,*perm,ld=ds->ld;
162: PetscScalar *A;
163: PetscReal *d;
165: PetscFunctionBegin;
166: if (!ds->sc) PetscFunctionReturn(PETSC_SUCCESS);
167: PetscCheck(ctx->m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
168: l = ds->l;
169: n = PetscMin(ds->n,ctx->m);
170: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
171: perm = ds->perm;
172: if (!rr) PetscCall(DSSortEigenvaluesReal_Private(ds,d,perm));
173: else PetscCall(DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_FALSE));
174: for (i=l;i<n;i++) wr[i] = d[perm[i]];
175: PetscCall(DSPermuteBoth_Private(ds,l,n,ds->n,ctx->m,DS_MAT_U,DS_MAT_V,perm));
176: for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
177: if (!ds->compact) {
178: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
179: for (i=l;i<n;i++) A[i+i*ld] = wr[i];
180: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
181: }
182: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
183: PetscFunctionReturn(PETSC_SUCCESS);
184: }
186: static PetscErrorCode DSUpdateExtraRow_SVD(DS ds)
187: {
188: DS_SVD *ctx = (DS_SVD*)ds->data;
189: PetscInt i;
190: PetscBLASInt n=0,m=0,ld,incx=1;
191: PetscScalar *A,*x,*y,one=1.0,zero=0.0;
192: PetscReal *T,*e,beta;
193: const PetscScalar *U;
195: PetscFunctionBegin;
196: PetscCheck(ctx->m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
197: PetscCall(PetscBLASIntCast(ds->n,&n));
198: PetscCall(PetscBLASIntCast(ctx->m,&m));
199: PetscCall(PetscBLASIntCast(ds->ld,&ld));
200: PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_U],&U));
201: if (ds->compact) {
202: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&T));
203: e = T+ld;
204: beta = e[m-1]; /* in compact, we assume all entries are zero except the last one */
205: for (i=0;i<n;i++) e[i] = PetscRealPart(beta*U[n-1+i*ld]);
206: ds->k = m;
207: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&T));
208: } else {
209: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
210: PetscCall(DSAllocateWork_Private(ds,2*ld,0,0));
211: x = ds->work;
212: y = ds->work+ld;
213: for (i=0;i<n;i++) x[i] = PetscConj(A[i+m*ld]);
214: PetscCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,U,&ld,x,&incx,&zero,y,&incx));
215: for (i=0;i<n;i++) A[i+m*ld] = PetscConj(y[i]);
216: ds->k = m;
217: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
218: }
219: PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_U],&U));
220: PetscFunctionReturn(PETSC_SUCCESS);
221: }
223: static PetscErrorCode DSTruncate_SVD(DS ds,PetscInt n,PetscBool trim)
224: {
225: PetscInt i,ld=ds->ld,l=ds->l;
226: PetscScalar *A;
227: DS_SVD *ctx = (DS_SVD*)ds->data;
229: PetscFunctionBegin;
230: if (!ds->compact && ds->extrarow) PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
231: if (trim) {
232: if (!ds->compact && ds->extrarow) { /* clean extra column */
233: for (i=l;i<ds->n;i++) A[i+ctx->m*ld] = 0.0;
234: }
235: ds->l = 0;
236: ds->k = 0;
237: ds->n = n;
238: ctx->m = n;
239: ds->t = ds->n; /* truncated length equal to the new dimension */
240: ctx->t = ctx->m; /* must also keep the previous dimension of V */
241: } else {
242: if (!ds->compact && ds->extrarow && ds->k==ds->n) {
243: /* copy entries of extra column to the new position, then clean last row */
244: for (i=l;i<n;i++) A[i+n*ld] = A[i+ctx->m*ld];
245: for (i=l;i<ds->n;i++) A[i+ctx->m*ld] = 0.0;
246: }
247: ds->k = (ds->extrarow)? n: 0;
248: ds->t = ds->n; /* truncated length equal to previous dimension */
249: ctx->t = ctx->m; /* must also keep the previous dimension of V */
250: ds->n = n;
251: ctx->m = n;
252: }
253: if (!ds->compact && ds->extrarow) PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
254: PetscFunctionReturn(PETSC_SUCCESS);
255: }
257: /*
258: DSArrowBidiag reduces a real square arrowhead matrix of the form
260: [ d 0 0 0 e ]
261: [ 0 d 0 0 e ]
262: A = [ 0 0 d 0 e ]
263: [ 0 0 0 d e ]
264: [ 0 0 0 0 d ]
266: to upper bidiagonal form
268: [ d e 0 0 0 ]
269: [ 0 d e 0 0 ]
270: B = Q'*A*P = [ 0 0 d e 0 ],
271: [ 0 0 0 d e ]
272: [ 0 0 0 0 d ]
274: where P,Q are orthogonal matrices. Uses plane rotations with a bulge chasing scheme.
275: On input, P and Q must be initialized to the identity matrix.
276: */
277: static PetscErrorCode DSArrowBidiag(PetscBLASInt n,PetscReal *d,PetscReal *e,PetscScalar *Q,PetscBLASInt ldq,PetscScalar *P,PetscBLASInt ldp)
278: {
279: PetscBLASInt i,j,j2,one=1;
280: PetscReal c,s,ct,st,off,temp0,temp1,temp2;
282: PetscFunctionBegin;
283: if (n<=2) PetscFunctionReturn(PETSC_SUCCESS);
285: for (j=0;j<n-2;j++) {
287: /* Eliminate entry e(j) by a rotation in the planes (j,j+1) */
288: temp0 = e[j+1];
289: PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp0,&e[j],&c,&s,&e[j+1]));
290: s = -s;
292: /* Apply rotation to Q */
293: j2 = j+2;
294: PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+j*ldq,&one,Q+(j+1)*ldq,&one,&c,&s));
296: /* Apply rotation to diagonal elements, eliminate newly introduced entry A(j+1,j) */
297: temp0 = d[j+1];
298: temp1 = c*temp0;
299: temp2 = -s*d[j];
300: PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp1,&temp2,&ct,&st,&d[j+1]));
301: st = -st;
302: e[j] = -c*st*d[j] + s*ct*temp0;
303: d[j] = c*ct*d[j] + s*st*temp0;
305: /* Apply rotation to P */
306: PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,P+j*ldp,&one,P+(j+1)*ldp,&one,&ct,&st));
308: /* Chase newly introduced off-diagonal entry to the top left corner */
309: for (i=j-1;i>=0;i--) {
311: /* Upper bulge */
312: off = -st*e[i];
313: e[i] = ct*e[i];
314: temp0 = e[i+1];
315: PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp0,&off,&c,&s,&e[i+1]));
316: s = -s;
317: PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+i*ldq,&one,Q+(i+1)*ldq,&one,&c,&s));
319: /* Lower bulge */
320: temp0 = d[i+1];
321: temp1 = -s*e[i] + c*temp0;
322: temp2 = c*e[i] + s*temp0;
323: off = -s*d[i];
324: PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp1,&off,&ct,&st,&d[i+1]));
325: st = -st;
326: e[i] = -c*st*d[i] + ct*temp2;
327: d[i] = c*ct*d[i] + st*temp2;
328: PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,P+i*ldp,&one,P+(i+1)*ldp,&one,&ct,&st));
329: }
330: }
331: PetscFunctionReturn(PETSC_SUCCESS);
332: }
334: /*
335: Reduce to bidiagonal form by means of DSArrowBidiag.
336: */
337: static PetscErrorCode DSIntermediate_SVD(DS ds)
338: {
339: DS_SVD *ctx = (DS_SVD*)ds->data;
340: PetscInt i,j;
341: PetscBLASInt n1 = 0,n2,m2,lwork,info,l = 0,n = 0,m = 0,nm,ld,off;
342: PetscScalar *A,*U,*V,*W,*work,*tauq,*taup;
343: PetscReal *d,*e;
345: PetscFunctionBegin;
346: PetscCall(PetscBLASIntCast(ds->n,&n));
347: PetscCall(PetscBLASIntCast(ctx->m,&m));
348: PetscCall(PetscBLASIntCast(ds->l,&l));
349: PetscCall(PetscBLASIntCast(ds->ld,&ld));
350: PetscCall(PetscBLASIntCast(PetscMax(0,ds->k-l+1),&n1)); /* size of leading block, excl. locked */
351: n2 = n-l; /* n2 = n1 + size of trailing block */
352: m2 = m-l;
353: off = l+l*ld;
354: nm = PetscMin(n,m);
355: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
356: e = d+ld;
357: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_U],&U));
358: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_V],&V));
359: PetscCall(PetscArrayzero(U,ld*ld));
360: for (i=0;i<n;i++) U[i+i*ld] = 1.0;
361: PetscCall(PetscArrayzero(V,ld*ld));
362: for (i=0;i<m;i++) V[i+i*ld] = 1.0;
364: if (ds->compact) {
366: if (ds->state<DS_STATE_INTERMEDIATE) PetscCall(DSArrowBidiag(n1,d+l,e+l,U+off,ld,V+off,ld));
368: } else {
370: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
371: for (i=0;i<l;i++) { d[i] = PetscRealPart(A[i+i*ld]); e[i] = 0.0; }
373: if (ds->state<DS_STATE_INTERMEDIATE) {
374: lwork = (m+n)*16;
375: PetscCall(DSAllocateWork_Private(ds,2*nm+ld*ld+lwork,0,0));
376: tauq = ds->work;
377: taup = ds->work+nm;
378: W = ds->work+2*nm;
379: work = ds->work+2*nm+ld*ld;
380: for (j=0;j<m;j++) PetscCall(PetscArraycpy(W+j*ld,A+j*ld,n));
381: PetscCallBLAS("LAPACKgebrd",LAPACKgebrd_(&n2,&m2,W+off,&ld,d+l,e+l,tauq,taup,work,&lwork,&info));
382: SlepcCheckLapackInfo("gebrd",info);
383: PetscCallBLAS("LAPACKormbr",LAPACKormbr_("Q","L","N",&n2,&n2,&m2,W+off,&ld,tauq,U+off,&ld,work,&lwork,&info));
384: SlepcCheckLapackInfo("ormbr",info);
385: PetscCallBLAS("LAPACKormbr",LAPACKormbr_("P","R","N",&m2,&m2,&n2,W+off,&ld,taup,V+off,&ld,work,&lwork,&info));
386: SlepcCheckLapackInfo("ormbr",info);
387: } else {
388: /* copy bidiagonal to d,e */
389: for (i=l;i<nm;i++) d[i] = PetscRealPart(A[i+i*ld]);
390: for (i=l;i<nm-1;i++) e[i] = PetscRealPart(A[i+(i+1)*ld]);
391: }
392: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
393: }
394: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_U],&U));
395: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_V],&V));
396: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
397: PetscFunctionReturn(PETSC_SUCCESS);
398: }
400: static PetscErrorCode DSSolve_SVD_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
401: {
402: DS_SVD *ctx = (DS_SVD*)ds->data;
403: PetscInt i,j;
404: PetscBLASInt n1,m1,info,l = 0,n = 0,m = 0,nm,ld,off,zero=0;
405: PetscScalar *A,*U,*V,*Vt;
406: PetscReal *d,*e;
408: PetscFunctionBegin;
409: PetscCheck(ctx->m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
410: PetscCall(PetscBLASIntCast(ds->n,&n));
411: PetscCall(PetscBLASIntCast(ctx->m,&m));
412: PetscCall(PetscBLASIntCast(ds->l,&l));
413: PetscCall(PetscBLASIntCast(ds->ld,&ld));
414: n1 = n-l; /* n1 = size of leading block, excl. locked + size of trailing block */
415: m1 = m-l;
416: nm = PetscMin(n1,m1);
417: off = l+l*ld;
418: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
419: e = d+ld;
421: /* Reduce to bidiagonal form */
422: PetscCall(DSIntermediate_SVD(ds));
424: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_U],&U));
425: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_V],&V));
427: /* solve bidiagonal SVD problem */
428: for (i=0;i<l;i++) wr[i] = d[i];
429: PetscCall(DSAllocateWork_Private(ds,ld*ld,4*n1,0));
430: Vt = ds->work;
431: for (i=l;i<m;i++) {
432: for (j=l;j<m;j++) {
433: Vt[i+j*ld] = PetscConj(V[j+i*ld]); /* Lapack expects transposed VT */
434: }
435: }
436: PetscCallBLAS("LAPACKbdsqr",LAPACKbdsqr_(n>=m?"U":"L",&nm,&m1,&n1,&zero,d+l,e+l,Vt+off,&ld,U+off,&ld,NULL,&ld,ds->rwork,&info));
437: SlepcCheckLapackInfo("bdsqr",info);
438: for (i=l;i<m;i++) {
439: for (j=l;j<m;j++) {
440: V[i+j*ld] = PetscConj(Vt[j+i*ld]); /* transpose VT returned by Lapack */
441: }
442: }
443: for (i=l;i<PetscMin(ds->n,ctx->m);i++) wr[i] = d[i];
445: /* create diagonal matrix as a result */
446: if (ds->compact) PetscCall(PetscArrayzero(e,n-1));
447: else {
448: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
449: for (i=l;i<m;i++) PetscCall(PetscArrayzero(A+l+i*ld,n-l));
450: for (i=l;i<PetscMin(ds->n,ctx->m);i++) A[i+i*ld] = d[i];
451: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
452: }
453: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_U],&U));
454: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_V],&V));
455: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
456: PetscFunctionReturn(PETSC_SUCCESS);
457: }
459: static PetscErrorCode DSSolve_SVD_DC(DS ds,PetscScalar *wr,PetscScalar *wi)
460: {
461: DS_SVD *ctx = (DS_SVD*)ds->data;
462: PetscInt i,j;
463: PetscBLASInt n1,m1,info,l = 0,n = 0,m = 0,nm,ld,off,lwork;
464: PetscScalar *A,*U,*V,*W,qwork;
465: PetscReal *d,*e,*Ur,*Vr;
467: PetscFunctionBegin;
468: PetscCheck(ctx->m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
469: PetscCall(PetscBLASIntCast(ds->n,&n));
470: PetscCall(PetscBLASIntCast(ctx->m,&m));
471: PetscCall(PetscBLASIntCast(ds->l,&l));
472: PetscCall(PetscBLASIntCast(ds->ld,&ld));
473: n1 = n-l; /* n1 = size of leading block, excl. locked + size of trailing block */
474: m1 = m-l;
475: off = l+l*ld;
476: if (ds->compact) PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
477: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
478: PetscCall(MatDenseGetArrayWrite(ds->omat[DS_MAT_U],&U));
479: PetscCall(MatDenseGetArrayWrite(ds->omat[DS_MAT_V],&V));
480: PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
481: e = d+ld;
482: PetscCall(PetscArrayzero(U,ld*ld));
483: for (i=0;i<l;i++) U[i+i*ld] = 1.0;
484: PetscCall(PetscArrayzero(V,ld*ld));
485: for (i=0;i<l;i++) V[i+i*ld] = 1.0;
487: if (ds->state>DS_STATE_RAW) {
488: /* solve bidiagonal SVD problem */
489: for (i=0;i<l;i++) wr[i] = d[i];
490: #if defined(PETSC_USE_COMPLEX)
491: PetscCall(DSAllocateWork_Private(ds,0,3*n1*n1+4*n1+2*ld*ld,8*n1));
492: Ur = ds->rwork+3*n1*n1+4*n1;
493: Vr = ds->rwork+3*n1*n1+4*n1+ld*ld;
494: #else
495: PetscCall(DSAllocateWork_Private(ds,0,3*n1*n1+4*n1+ld*ld,8*n1));
496: Ur = U;
497: Vr = ds->rwork+3*n1*n1+4*n1;
498: #endif
499: PetscCallBLAS("LAPACKbdsdc",LAPACKbdsdc_("U","I",&n1,d+l,e+l,Ur+off,&ld,Vr+off,&ld,NULL,NULL,ds->rwork,ds->iwork,&info));
500: SlepcCheckLapackInfo("bdsdc",info);
501: for (i=l;i<n;i++) {
502: for (j=l;j<n;j++) {
503: #if defined(PETSC_USE_COMPLEX)
504: U[i+j*ld] = Ur[i+j*ld];
505: #endif
506: V[i+j*ld] = PetscConj(Vr[j+i*ld]); /* transpose VT returned by Lapack */
507: }
508: }
509: } else {
510: /* solve general rectangular SVD problem */
511: PetscCall(DSAllocateMat_Private(ds,DS_MAT_W));
512: PetscCall(MatDenseGetArrayWrite(ds->omat[DS_MAT_W],&W));
513: if (ds->compact) PetscCall(DSSwitchFormat_SVD(ds));
514: for (i=0;i<l;i++) wr[i] = d[i];
515: nm = PetscMin(n,m);
516: PetscCall(DSAllocateWork_Private(ds,0,0,8*nm));
517: lwork = -1;
518: #if defined(PETSC_USE_COMPLEX)
519: PetscCall(DSAllocateWork_Private(ds,0,5*nm*nm+7*nm,0));
520: PetscCallBLAS("LAPACKgesdd",LAPACKgesdd_("A",&n1,&m1,A+off,&ld,d+l,U+off,&ld,W+off,&ld,&qwork,&lwork,ds->rwork,ds->iwork,&info));
521: #else
522: PetscCallBLAS("LAPACKgesdd",LAPACKgesdd_("A",&n1,&m1,A+off,&ld,d+l,U+off,&ld,W+off,&ld,&qwork,&lwork,ds->iwork,&info));
523: #endif
524: SlepcCheckLapackInfo("gesdd",info);
525: PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(qwork),&lwork));
526: PetscCall(DSAllocateWork_Private(ds,lwork,0,0));
527: #if defined(PETSC_USE_COMPLEX)
528: PetscCallBLAS("LAPACKgesdd",LAPACKgesdd_("A",&n1,&m1,A+off,&ld,d+l,U+off,&ld,W+off,&ld,ds->work,&lwork,ds->rwork,ds->iwork,&info));
529: #else
530: PetscCallBLAS("LAPACKgesdd",LAPACKgesdd_("A",&n1,&m1,A+off,&ld,d+l,U+off,&ld,W+off,&ld,ds->work,&lwork,ds->iwork,&info));
531: #endif
532: SlepcCheckLapackInfo("gesdd",info);
533: for (i=l;i<m;i++) {
534: for (j=l;j<m;j++) V[i+j*ld] = PetscConj(W[j+i*ld]); /* transpose VT returned by Lapack */
535: }
536: PetscCall(MatDenseRestoreArrayWrite(ds->omat[DS_MAT_W],&W));
537: }
538: for (i=l;i<PetscMin(ds->n,ctx->m);i++) wr[i] = d[i];
540: /* create diagonal matrix as a result */
541: if (ds->compact) PetscCall(PetscArrayzero(e,n-1));
542: else {
543: for (i=l;i<m;i++) PetscCall(PetscArrayzero(A+l+i*ld,n-l));
544: for (i=l;i<n;i++) A[i+i*ld] = d[i];
545: }
546: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
547: PetscCall(MatDenseRestoreArrayWrite(ds->omat[DS_MAT_U],&U));
548: PetscCall(MatDenseRestoreArrayWrite(ds->omat[DS_MAT_V],&V));
549: PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
550: PetscFunctionReturn(PETSC_SUCCESS);
551: }
553: #if !defined(PETSC_HAVE_MPIUNI)
554: static PetscErrorCode DSSynchronize_SVD(DS ds,PetscScalar eigr[],PetscScalar eigi[])
555: {
556: PetscInt ld=ds->ld,l=ds->l,k=0,kr=0;
557: PetscMPIInt n,rank,off=0,size,ldn,ld3;
558: PetscScalar *A,*U,*V;
559: PetscReal *T;
561: PetscFunctionBegin;
562: if (ds->compact) kr = 3*ld;
563: else k = (ds->n-l)*ld;
564: if (ds->state>DS_STATE_RAW) k += 2*(ds->n-l)*ld;
565: if (eigr) k += ds->n-l;
566: PetscCall(DSAllocateWork_Private(ds,k+kr,0,0));
567: PetscCall(PetscMPIIntCast(k*sizeof(PetscScalar)+kr*sizeof(PetscReal),&size));
568: PetscCall(PetscMPIIntCast(ds->n-l,&n));
569: PetscCall(PetscMPIIntCast(ld*(ds->n-l),&ldn));
570: PetscCall(PetscMPIIntCast(3*ld,&ld3));
571: if (ds->compact) PetscCall(DSGetArrayReal(ds,DS_MAT_T,&T));
572: else PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
573: if (ds->state>DS_STATE_RAW) {
574: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_U],&U));
575: PetscCall(MatDenseGetArray(ds->omat[DS_MAT_V],&V));
576: }
577: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank));
578: if (!rank) {
579: if (ds->compact) PetscCallMPI(MPI_Pack(T,ld3,MPIU_REAL,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
580: else PetscCallMPI(MPI_Pack(A+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
581: if (ds->state>DS_STATE_RAW) {
582: PetscCallMPI(MPI_Pack(U+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
583: PetscCallMPI(MPI_Pack(V+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
584: }
585: if (eigr) PetscCallMPI(MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
586: }
587: PetscCallMPI(MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds)));
588: if (rank) {
589: if (ds->compact) PetscCallMPI(MPI_Unpack(ds->work,size,&off,T,ld3,MPIU_REAL,PetscObjectComm((PetscObject)ds)));
590: else PetscCallMPI(MPI_Unpack(ds->work,size,&off,A+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
591: if (ds->state>DS_STATE_RAW) {
592: PetscCallMPI(MPI_Unpack(ds->work,size,&off,U+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
593: PetscCallMPI(MPI_Unpack(ds->work,size,&off,V+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
594: }
595: if (eigr) PetscCallMPI(MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
596: }
597: if (ds->compact) PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&T));
598: else PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
599: if (ds->state>DS_STATE_RAW) {
600: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_U],&U));
601: PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_V],&V));
602: }
603: PetscFunctionReturn(PETSC_SUCCESS);
604: }
605: #endif
607: static PetscErrorCode DSMatGetSize_SVD(DS ds,DSMatType t,PetscInt *rows,PetscInt *cols)
608: {
609: DS_SVD *ctx = (DS_SVD*)ds->data;
611: PetscFunctionBegin;
612: PetscCheck(ctx->m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
613: switch (t) {
614: case DS_MAT_A:
615: *rows = ds->n;
616: *cols = ds->extrarow? ctx->m+1: ctx->m;
617: break;
618: case DS_MAT_T:
619: *rows = ds->n;
620: *cols = PetscDefined(USE_COMPLEX)? 2: 3;
621: break;
622: case DS_MAT_U:
623: *rows = ds->state==DS_STATE_TRUNCATED? ds->t: ds->n;
624: *cols = ds->n;
625: break;
626: case DS_MAT_V:
627: *rows = ds->state==DS_STATE_TRUNCATED? ctx->t: ctx->m;
628: *cols = ctx->m;
629: break;
630: default:
631: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid t parameter");
632: }
633: PetscFunctionReturn(PETSC_SUCCESS);
634: }
636: static PetscErrorCode DSSVDSetDimensions_SVD(DS ds,PetscInt m)
637: {
638: DS_SVD *ctx = (DS_SVD*)ds->data;
640: PetscFunctionBegin;
641: DSCheckAlloc(ds,1);
642: if (m==PETSC_DECIDE || m==PETSC_DEFAULT) {
643: ctx->m = ds->ld;
644: } else {
645: PetscCheck(m>0 && m<=ds->ld,PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Illegal value of m. Must be between 1 and ld");
646: ctx->m = m;
647: }
648: PetscFunctionReturn(PETSC_SUCCESS);
649: }
651: /*@
652: DSSVDSetDimensions - Sets the number of columns for a DSSVD.
654: Logically Collective
656: Input Parameters:
657: + ds - the direct solver context
658: - m - the number of columns
660: Notes:
661: This call is complementary to DSSetDimensions(), to provide a dimension
662: that is specific to this DS type.
664: Level: intermediate
666: .seealso: DSSVDGetDimensions(), DSSetDimensions()
667: @*/
668: PetscErrorCode DSSVDSetDimensions(DS ds,PetscInt m)
669: {
670: PetscFunctionBegin;
673: PetscTryMethod(ds,"DSSVDSetDimensions_C",(DS,PetscInt),(ds,m));
674: PetscFunctionReturn(PETSC_SUCCESS);
675: }
677: static PetscErrorCode DSSVDGetDimensions_SVD(DS ds,PetscInt *m)
678: {
679: DS_SVD *ctx = (DS_SVD*)ds->data;
681: PetscFunctionBegin;
682: *m = ctx->m;
683: PetscFunctionReturn(PETSC_SUCCESS);
684: }
686: /*@
687: DSSVDGetDimensions - Returns the number of columns for a DSSVD.
689: Not Collective
691: Input Parameter:
692: . ds - the direct solver context
694: Output Parameters:
695: . m - the number of columns
697: Level: intermediate
699: .seealso: DSSVDSetDimensions()
700: @*/
701: PetscErrorCode DSSVDGetDimensions(DS ds,PetscInt *m)
702: {
703: PetscFunctionBegin;
705: PetscAssertPointer(m,2);
706: PetscUseMethod(ds,"DSSVDGetDimensions_C",(DS,PetscInt*),(ds,m));
707: PetscFunctionReturn(PETSC_SUCCESS);
708: }
710: static PetscErrorCode DSDestroy_SVD(DS ds)
711: {
712: PetscFunctionBegin;
713: PetscCall(PetscFree(ds->data));
714: PetscCall(PetscObjectComposeFunction((PetscObject)ds,"DSSVDSetDimensions_C",NULL));
715: PetscCall(PetscObjectComposeFunction((PetscObject)ds,"DSSVDGetDimensions_C",NULL));
716: PetscFunctionReturn(PETSC_SUCCESS);
717: }
719: static PetscErrorCode DSSetCompact_SVD(DS ds,PetscBool comp)
720: {
721: PetscFunctionBegin;
722: if (!comp) PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
723: PetscFunctionReturn(PETSC_SUCCESS);
724: }
726: static PetscErrorCode DSReallocate_SVD(DS ds,PetscInt ld)
727: {
728: PetscInt i,*perm=ds->perm;
730: PetscFunctionBegin;
731: for (i=0;i<DS_NUM_MAT;i++) {
732: if (!ds->compact && i==DS_MAT_A) continue;
733: if (i!=DS_MAT_U && i!=DS_MAT_V && i!=DS_MAT_T) PetscCall(MatDestroy(&ds->omat[i]));
734: }
736: if (!ds->compact) PetscCall(DSReallocateMat_Private(ds,DS_MAT_A,ld));
737: PetscCall(DSReallocateMat_Private(ds,DS_MAT_U,ld));
738: PetscCall(DSReallocateMat_Private(ds,DS_MAT_V,ld));
739: PetscCall(DSReallocateMat_Private(ds,DS_MAT_T,ld));
741: PetscCall(PetscMalloc1(ld,&ds->perm));
742: PetscCall(PetscArraycpy(ds->perm,perm,ds->ld));
743: PetscCall(PetscFree(perm));
744: PetscFunctionReturn(PETSC_SUCCESS);
745: }
747: /*MC
748: DSSVD - Dense Singular Value Decomposition.
750: Level: beginner
752: Notes:
753: The problem is expressed as A = U*Sigma*V', where A is rectangular in
754: general, with n rows and m columns. Sigma is a diagonal matrix whose diagonal
755: elements are the arguments of DSSolve(). After solve, A is overwritten
756: with Sigma.
758: The orthogonal (or unitary) matrices of left and right singular vectors, U
759: and V, have size n and m, respectively. The number of columns m must
760: be specified via DSSVDSetDimensions().
762: If the DS object is in the intermediate state, A is assumed to be in upper
763: bidiagonal form (possibly with an arrow) and is stored in compact format
764: on matrix T. Otherwise, no particular structure is assumed. The compact
765: storage is implemented for the square case only, m=n. The extra row should
766: be interpreted in this case as an extra column.
768: Used DS matrices:
769: + DS_MAT_A - problem matrix (used only if compact=false)
770: . DS_MAT_T - upper bidiagonal matrix
771: . DS_MAT_U - left singular vectors
772: - DS_MAT_V - right singular vectors
774: Implemented methods:
775: + 0 - Implicit zero-shift QR for bidiagonals (_bdsqr)
776: - 1 - Divide and Conquer (_bdsdc or _gesdd)
778: .seealso: DSCreate(), DSSetType(), DSType, DSSVDSetDimensions()
779: M*/
780: SLEPC_EXTERN PetscErrorCode DSCreate_SVD(DS ds)
781: {
782: DS_SVD *ctx;
784: PetscFunctionBegin;
785: PetscCall(PetscNew(&ctx));
786: ds->data = (void*)ctx;
788: ds->ops->allocate = DSAllocate_SVD;
789: ds->ops->view = DSView_SVD;
790: ds->ops->vectors = DSVectors_SVD;
791: ds->ops->solve[0] = DSSolve_SVD_QR;
792: ds->ops->solve[1] = DSSolve_SVD_DC;
793: ds->ops->sort = DSSort_SVD;
794: ds->ops->truncate = DSTruncate_SVD;
795: ds->ops->update = DSUpdateExtraRow_SVD;
796: ds->ops->destroy = DSDestroy_SVD;
797: ds->ops->matgetsize = DSMatGetSize_SVD;
798: #if !defined(PETSC_HAVE_MPIUNI)
799: ds->ops->synchronize = DSSynchronize_SVD;
800: #endif
801: ds->ops->setcompact = DSSetCompact_SVD;
802: ds->ops->reallocate = DSReallocate_SVD;
803: PetscCall(PetscObjectComposeFunction((PetscObject)ds,"DSSVDSetDimensions_C",DSSVDSetDimensions_SVD));
804: PetscCall(PetscObjectComposeFunction((PetscObject)ds,"DSSVDGetDimensions_C",DSSVDGetDimensions_SVD));
805: PetscFunctionReturn(PETSC_SUCCESS);
806: }