Actual source code: dssvd.c

slepc-main 2024-11-15
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/dsimpl.h>
 12: #include <slepcblaslapack.h>

 14: typedef struct {
 15:   PetscInt m;              /* number of columns */
 16:   PetscInt t;              /* number of rows of V after truncating */
 17: } DS_SVD;

 19: static PetscErrorCode DSAllocate_SVD(DS ds,PetscInt ld)
 20: {
 21:   PetscFunctionBegin;
 22:   if (!ds->compact) PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
 23:   PetscCall(DSAllocateMat_Private(ds,DS_MAT_U));
 24:   PetscCall(DSAllocateMat_Private(ds,DS_MAT_V));
 25:   PetscCall(DSAllocateMat_Private(ds,DS_MAT_T));
 26:   PetscCall(PetscFree(ds->perm));
 27:   PetscCall(PetscMalloc1(ld,&ds->perm));
 28:   PetscFunctionReturn(PETSC_SUCCESS);
 29: }

 31: /*   0       l           k                 m-1
 32:     -----------------------------------------
 33:     |*       .           .                  |
 34:     |  *     .           .                  |
 35:     |    *   .           .                  |
 36:     |      * .           .                  |
 37:     |        o           o                  |
 38:     |          o         o                  |
 39:     |            o       o                  |
 40:     |              o     o                  |
 41:     |                o   o                  |
 42:     |                  o o                  |
 43:     |                    o x                |
 44:     |                      x x              |
 45:     |                        x x            |
 46:     |                          x x          |
 47:     |                            x x        |
 48:     |                              x x      |
 49:     |                                x x    |
 50:     |                                  x x  |
 51:     |                                    x x|
 52: n-1 |                                      x|
 53:     -----------------------------------------
 54: */

 56: static PetscErrorCode DSSwitchFormat_SVD(DS ds)
 57: {
 58:   DS_SVD         *ctx = (DS_SVD*)ds->data;
 59:   PetscReal      *T;
 60:   PetscScalar    *A;
 61:   PetscInt       i,m=ctx->m,k=ds->k,ld=ds->ld;

 63:   PetscFunctionBegin;
 64:   PetscCheck(m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
 65:   PetscCheck(ds->compact,PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Must have compact storage");
 66:   /* switch from compact (arrow) to dense storage */
 67:   PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
 68:   PetscCall(MatDenseGetArrayWrite(ds->omat[DS_MAT_A],&A));
 69:   PetscCall(DSGetArrayReal(ds,DS_MAT_T,&T));
 70:   PetscCall(PetscArrayzero(A,ld*ld));
 71:   for (i=0;i<k;i++) {
 72:     A[i+i*ld] = T[i];
 73:     A[i+k*ld] = T[i+ld];
 74:   }
 75:   A[k+k*ld] = T[k];
 76:   for (i=k+1;i<m;i++) {
 77:     A[i+i*ld]   = T[i];
 78:     A[i-1+i*ld] = T[i-1+ld];
 79:   }
 80:   PetscCall(MatDenseRestoreArrayWrite(ds->omat[DS_MAT_A],&A));
 81:   PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&T));
 82:   PetscFunctionReturn(PETSC_SUCCESS);
 83: }

 85: static PetscErrorCode DSView_SVD(DS ds,PetscViewer viewer)
 86: {
 87:   DS_SVD            *ctx = (DS_SVD*)ds->data;
 88:   PetscViewerFormat format;
 89:   PetscInt          i,j,r,c,m=ctx->m,rows,cols;
 90:   PetscReal         *T,value;
 91:   const char        *methodname[] = {
 92:                      "Implicit zero-shift QR for bidiagonals (_bdsqr)",
 93:                      "Divide and Conquer (_bdsdc or _gesdd)"
 94:   };
 95:   const int         nmeth=PETSC_STATIC_ARRAY_LENGTH(methodname);

 97:   PetscFunctionBegin;
 98:   PetscCall(PetscViewerGetFormat(viewer,&format));
 99:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
100:     PetscCall(PetscViewerASCIIPrintf(viewer,"number of columns: %" PetscInt_FMT "\n",m));
101:     if (ds->method<nmeth) PetscCall(PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]));
102:     PetscFunctionReturn(PETSC_SUCCESS);
103:   }
104:   PetscCheck(m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
105:   if (ds->compact) {
106:     PetscCall(DSGetArrayReal(ds,DS_MAT_T,&T));
107:     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
108:     rows = ds->n;
109:     cols = ds->extrarow? m+1: m;
110:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
111:       PetscCall(PetscViewerASCIIPrintf(viewer,"%% Size = %" PetscInt_FMT " %" PetscInt_FMT "\n",rows,cols));
112:       PetscCall(PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",3);\n",2*ds->n));
113:       PetscCall(PetscViewerASCIIPrintf(viewer,"zzz = [\n"));
114:       for (i=0;i<PetscMin(ds->n,m);i++) PetscCall(PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",i+1,i+1,(double)T[i]));
115:       for (i=0;i<cols-1;i++) {
116:         r = PetscMax(i+2,ds->k+1);
117:         c = i+1;
118:         PetscCall(PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT "  %18.16e\n",c,r,(double)T[i+ds->ld]));
119:       }
120:       PetscCall(PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]));
121:     } else {
122:       for (i=0;i<rows;i++) {
123:         for (j=0;j<cols;j++) {
124:           if (i==j) value = T[i];
125:           else if (i<ds->k && j==ds->k) value = T[PetscMin(i,j)+ds->ld];
126:           else if (i+1==j && i>=ds->k) value = T[i+ds->ld];
127:           else value = 0.0;
128:           PetscCall(PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value));
129:         }
130:         PetscCall(PetscViewerASCIIPrintf(viewer,"\n"));
131:       }
132:     }
133:     PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
134:     PetscCall(PetscViewerFlush(viewer));
135:     PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&T));
136:   } else PetscCall(DSViewMat(ds,viewer,DS_MAT_A));
137:   if (ds->state>DS_STATE_INTERMEDIATE) {
138:     PetscCall(DSViewMat(ds,viewer,DS_MAT_U));
139:     PetscCall(DSViewMat(ds,viewer,DS_MAT_V));
140:   }
141:   PetscFunctionReturn(PETSC_SUCCESS);
142: }

144: static PetscErrorCode DSVectors_SVD(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
145: {
146:   PetscFunctionBegin;
147:   switch (mat) {
148:     case DS_MAT_U:
149:     case DS_MAT_V:
150:       if (rnorm) *rnorm = 0.0;
151:       break;
152:     default:
153:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
154:   }
155:   PetscFunctionReturn(PETSC_SUCCESS);
156: }

158: static PetscErrorCode DSSort_SVD(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
159: {
160:   DS_SVD         *ctx = (DS_SVD*)ds->data;
161:   PetscInt       n,l,i,*perm,ld=ds->ld;
162:   PetscScalar    *A;
163:   PetscReal      *d;

165:   PetscFunctionBegin;
166:   if (!ds->sc) PetscFunctionReturn(PETSC_SUCCESS);
167:   PetscCheck(ctx->m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
168:   l = ds->l;
169:   n = PetscMin(ds->n,ctx->m);
170:   PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
171:   perm = ds->perm;
172:   if (!rr) PetscCall(DSSortEigenvaluesReal_Private(ds,d,perm));
173:   else PetscCall(DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_FALSE));
174:   for (i=l;i<n;i++) wr[i] = d[perm[i]];
175:   PetscCall(DSPermuteBoth_Private(ds,l,n,ds->n,ctx->m,DS_MAT_U,DS_MAT_V,perm));
176:   for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
177:   if (!ds->compact) {
178:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
179:     for (i=l;i<n;i++) A[i+i*ld] = wr[i];
180:     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
181:   }
182:   PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
183:   PetscFunctionReturn(PETSC_SUCCESS);
184: }

186: static PetscErrorCode DSUpdateExtraRow_SVD(DS ds)
187: {
188:   DS_SVD            *ctx = (DS_SVD*)ds->data;
189:   PetscInt          i;
190:   PetscBLASInt      n=0,m=0,ld,incx=1;
191:   PetscScalar       *A,*x,*y,one=1.0,zero=0.0;
192:   PetscReal         *T,*e,beta;
193:   const PetscScalar *U;

195:   PetscFunctionBegin;
196:   PetscCheck(ctx->m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
197:   PetscCall(PetscBLASIntCast(ds->n,&n));
198:   PetscCall(PetscBLASIntCast(ctx->m,&m));
199:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
200:   PetscCall(MatDenseGetArrayRead(ds->omat[DS_MAT_U],&U));
201:   if (ds->compact) {
202:     PetscCall(DSGetArrayReal(ds,DS_MAT_T,&T));
203:     e = T+ld;
204:     beta = e[m-1];   /* in compact, we assume all entries are zero except the last one */
205:     for (i=0;i<n;i++) e[i] = PetscRealPart(beta*U[n-1+i*ld]);
206:     ds->k = m;
207:     PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&T));
208:   } else {
209:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
210:     PetscCall(DSAllocateWork_Private(ds,2*ld,0,0));
211:     x = ds->work;
212:     y = ds->work+ld;
213:     for (i=0;i<n;i++) x[i] = PetscConj(A[i+m*ld]);
214:     PetscCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,U,&ld,x,&incx,&zero,y,&incx));
215:     for (i=0;i<n;i++) A[i+m*ld] = PetscConj(y[i]);
216:     ds->k = m;
217:     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
218:   }
219:   PetscCall(MatDenseRestoreArrayRead(ds->omat[DS_MAT_U],&U));
220:   PetscFunctionReturn(PETSC_SUCCESS);
221: }

223: static PetscErrorCode DSTruncate_SVD(DS ds,PetscInt n,PetscBool trim)
224: {
225:   PetscInt    i,ld=ds->ld,l=ds->l;
226:   PetscScalar *A;
227:   DS_SVD      *ctx = (DS_SVD*)ds->data;

229:   PetscFunctionBegin;
230:   if (!ds->compact && ds->extrarow) PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
231:   if (trim) {
232:     if (!ds->compact && ds->extrarow) {   /* clean extra column */
233:       for (i=l;i<ds->n;i++) A[i+ctx->m*ld] = 0.0;
234:     }
235:     ds->l  = 0;
236:     ds->k  = 0;
237:     ds->n  = n;
238:     ctx->m = n;
239:     ds->t  = ds->n;   /* truncated length equal to the new dimension */
240:     ctx->t = ctx->m;  /* must also keep the previous dimension of V */
241:   } else {
242:     if (!ds->compact && ds->extrarow && ds->k==ds->n) {
243:       /* copy entries of extra column to the new position, then clean last row */
244:       for (i=l;i<n;i++) A[i+n*ld] = A[i+ctx->m*ld];
245:       for (i=l;i<ds->n;i++) A[i+ctx->m*ld] = 0.0;
246:     }
247:     ds->k  = (ds->extrarow)? n: 0;
248:     ds->t  = ds->n;   /* truncated length equal to previous dimension */
249:     ctx->t = ctx->m;  /* must also keep the previous dimension of V */
250:     ds->n  = n;
251:     ctx->m = n;
252:   }
253:   if (!ds->compact && ds->extrarow) PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
254:   PetscFunctionReturn(PETSC_SUCCESS);
255: }

257: /*
258:   DSArrowBidiag reduces a real square arrowhead matrix of the form

260:                 [ d 0 0 0 e ]
261:                 [ 0 d 0 0 e ]
262:             A = [ 0 0 d 0 e ]
263:                 [ 0 0 0 d e ]
264:                 [ 0 0 0 0 d ]

266:   to upper bidiagonal form

268:                 [ d e 0 0 0 ]
269:                 [ 0 d e 0 0 ]
270:    B = Q'*A*P = [ 0 0 d e 0 ],
271:                 [ 0 0 0 d e ]
272:                 [ 0 0 0 0 d ]

274:   where P,Q are orthogonal matrices. Uses plane rotations with a bulge chasing scheme.
275:   On input, P and Q must be initialized to the identity matrix.
276: */
277: static PetscErrorCode DSArrowBidiag(PetscBLASInt n,PetscReal *d,PetscReal *e,PetscScalar *Q,PetscBLASInt ldq,PetscScalar *P,PetscBLASInt ldp)
278: {
279:   PetscBLASInt i,j,j2,one=1;
280:   PetscReal    c,s,ct,st,off,temp0,temp1,temp2;

282:   PetscFunctionBegin;
283:   if (n<=2) PetscFunctionReturn(PETSC_SUCCESS);

285:   for (j=0;j<n-2;j++) {

287:     /* Eliminate entry e(j) by a rotation in the planes (j,j+1) */
288:     temp0 = e[j+1];
289:     PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp0,&e[j],&c,&s,&e[j+1]));
290:     s = -s;

292:     /* Apply rotation to Q */
293:     j2 = j+2;
294:     PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+j*ldq,&one,Q+(j+1)*ldq,&one,&c,&s));

296:     /* Apply rotation to diagonal elements, eliminate newly introduced entry A(j+1,j) */
297:     temp0 = d[j+1];
298:     temp1 = c*temp0;
299:     temp2 = -s*d[j];
300:     PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp1,&temp2,&ct,&st,&d[j+1]));
301:     st = -st;
302:     e[j] = -c*st*d[j] + s*ct*temp0;
303:     d[j] = c*ct*d[j] + s*st*temp0;

305:     /* Apply rotation to P */
306:     PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,P+j*ldp,&one,P+(j+1)*ldp,&one,&ct,&st));

308:     /* Chase newly introduced off-diagonal entry to the top left corner */
309:     for (i=j-1;i>=0;i--) {

311:       /* Upper bulge */
312:       off   = -st*e[i];
313:       e[i]  = ct*e[i];
314:       temp0 = e[i+1];
315:       PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp0,&off,&c,&s,&e[i+1]));
316:       s = -s;
317:       PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+i*ldq,&one,Q+(i+1)*ldq,&one,&c,&s));

319:       /* Lower bulge */
320:       temp0 = d[i+1];
321:       temp1 = -s*e[i] + c*temp0;
322:       temp2 = c*e[i] + s*temp0;
323:       off   = -s*d[i];
324:       PetscCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp1,&off,&ct,&st,&d[i+1]));
325:       st = -st;
326:       e[i] = -c*st*d[i] + ct*temp2;
327:       d[i] = c*ct*d[i] + st*temp2;
328:       PetscCallBLAS("BLASrot",BLASMIXEDrot_(&j2,P+i*ldp,&one,P+(i+1)*ldp,&one,&ct,&st));
329:     }
330:   }
331:   PetscFunctionReturn(PETSC_SUCCESS);
332: }

334: /*
335:    Reduce to bidiagonal form by means of DSArrowBidiag.
336: */
337: static PetscErrorCode DSIntermediate_SVD(DS ds)
338: {
339:   DS_SVD        *ctx = (DS_SVD*)ds->data;
340:   PetscInt      i,j;
341:   PetscBLASInt  n1 = 0,n2,m2,lwork,info,l = 0,n = 0,m = 0,nm,ld,off;
342:   PetscScalar   *A,*U,*V,*W,*work,*tauq,*taup;
343:   PetscReal     *d,*e;

345:   PetscFunctionBegin;
346:   PetscCall(PetscBLASIntCast(ds->n,&n));
347:   PetscCall(PetscBLASIntCast(ctx->m,&m));
348:   PetscCall(PetscBLASIntCast(ds->l,&l));
349:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
350:   PetscCall(PetscBLASIntCast(PetscMax(0,ds->k-l+1),&n1)); /* size of leading block, excl. locked */
351:   n2 = n-l;     /* n2 = n1 + size of trailing block */
352:   m2 = m-l;
353:   off = l+l*ld;
354:   nm = PetscMin(n,m);
355:   PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
356:   e = d+ld;
357:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_U],&U));
358:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_V],&V));
359:   PetscCall(PetscArrayzero(U,ld*ld));
360:   for (i=0;i<n;i++) U[i+i*ld] = 1.0;
361:   PetscCall(PetscArrayzero(V,ld*ld));
362:   for (i=0;i<m;i++) V[i+i*ld] = 1.0;

364:   if (ds->compact) {

366:     if (ds->state<DS_STATE_INTERMEDIATE) PetscCall(DSArrowBidiag(n1,d+l,e+l,U+off,ld,V+off,ld));

368:   } else {

370:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
371:     for (i=0;i<l;i++) { d[i] = PetscRealPart(A[i+i*ld]); e[i] = 0.0; }

373:     if (ds->state<DS_STATE_INTERMEDIATE) {
374:       lwork = (m+n)*16;
375:       PetscCall(DSAllocateWork_Private(ds,2*nm+ld*ld+lwork,0,0));
376:       tauq = ds->work;
377:       taup = ds->work+nm;
378:       W    = ds->work+2*nm;
379:       work = ds->work+2*nm+ld*ld;
380:       for (j=0;j<m;j++) PetscCall(PetscArraycpy(W+j*ld,A+j*ld,n));
381:       PetscCallBLAS("LAPACKgebrd",LAPACKgebrd_(&n2,&m2,W+off,&ld,d+l,e+l,tauq,taup,work,&lwork,&info));
382:       SlepcCheckLapackInfo("gebrd",info);
383:       PetscCallBLAS("LAPACKormbr",LAPACKormbr_("Q","L","N",&n2,&n2,&m2,W+off,&ld,tauq,U+off,&ld,work,&lwork,&info));
384:       SlepcCheckLapackInfo("ormbr",info);
385:       PetscCallBLAS("LAPACKormbr",LAPACKormbr_("P","R","N",&m2,&m2,&n2,W+off,&ld,taup,V+off,&ld,work,&lwork,&info));
386:       SlepcCheckLapackInfo("ormbr",info);
387:     } else {
388:       /* copy bidiagonal to d,e */
389:       for (i=l;i<nm;i++)   d[i] = PetscRealPart(A[i+i*ld]);
390:       for (i=l;i<nm-1;i++) e[i] = PetscRealPart(A[i+(i+1)*ld]);
391:     }
392:     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
393:   }
394:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_U],&U));
395:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_V],&V));
396:   PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
397:   PetscFunctionReturn(PETSC_SUCCESS);
398: }

400: static PetscErrorCode DSSolve_SVD_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
401: {
402:   DS_SVD         *ctx = (DS_SVD*)ds->data;
403:   PetscInt       i,j;
404:   PetscBLASInt   n1,m1,info,l = 0,n = 0,m = 0,nm,ld,off,zero=0;
405:   PetscScalar    *A,*U,*V,*Vt;
406:   PetscReal      *d,*e;

408:   PetscFunctionBegin;
409:   PetscCheck(ctx->m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
410:   PetscCall(PetscBLASIntCast(ds->n,&n));
411:   PetscCall(PetscBLASIntCast(ctx->m,&m));
412:   PetscCall(PetscBLASIntCast(ds->l,&l));
413:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
414:   n1 = n-l;     /* n1 = size of leading block, excl. locked + size of trailing block */
415:   m1 = m-l;
416:   nm = PetscMin(n1,m1);
417:   off = l+l*ld;
418:   PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
419:   e = d+ld;

421:   /* Reduce to bidiagonal form */
422:   PetscCall(DSIntermediate_SVD(ds));

424:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_U],&U));
425:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_V],&V));

427:   /* solve bidiagonal SVD problem */
428:   for (i=0;i<l;i++) wr[i] = d[i];
429:   PetscCall(DSAllocateWork_Private(ds,ld*ld,4*n1,0));
430:   Vt = ds->work;
431:   for (i=l;i<m;i++) {
432:     for (j=l;j<m;j++) {
433:       Vt[i+j*ld] = PetscConj(V[j+i*ld]);  /* Lapack expects transposed VT */
434:     }
435:   }
436:   PetscCallBLAS("LAPACKbdsqr",LAPACKbdsqr_(n>=m?"U":"L",&nm,&m1,&n1,&zero,d+l,e+l,Vt+off,&ld,U+off,&ld,NULL,&ld,ds->rwork,&info));
437:   SlepcCheckLapackInfo("bdsqr",info);
438:   for (i=l;i<m;i++) {
439:     for (j=l;j<m;j++) {
440:       V[i+j*ld] = PetscConj(Vt[j+i*ld]);  /* transpose VT returned by Lapack */
441:     }
442:   }
443:   for (i=l;i<PetscMin(ds->n,ctx->m);i++) wr[i] = d[i];

445:   /* create diagonal matrix as a result */
446:   if (ds->compact) PetscCall(PetscArrayzero(e,n-1));
447:   else {
448:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
449:     for (i=l;i<m;i++) PetscCall(PetscArrayzero(A+l+i*ld,n-l));
450:     for (i=l;i<PetscMin(ds->n,ctx->m);i++) A[i+i*ld] = d[i];
451:     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
452:   }
453:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_U],&U));
454:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_V],&V));
455:   PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
456:   PetscFunctionReturn(PETSC_SUCCESS);
457: }

459: static PetscErrorCode DSSolve_SVD_DC(DS ds,PetscScalar *wr,PetscScalar *wi)
460: {
461:   DS_SVD         *ctx = (DS_SVD*)ds->data;
462:   PetscInt       i,j;
463:   PetscBLASInt   n1,m1,info,l = 0,n = 0,m = 0,nm,ld,off,lwork;
464:   PetscScalar    *A,*U,*V,*W,qwork;
465:   PetscReal      *d,*e,*Ur,*Vr;

467:   PetscFunctionBegin;
468:   PetscCheck(ctx->m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
469:   PetscCall(PetscBLASIntCast(ds->n,&n));
470:   PetscCall(PetscBLASIntCast(ctx->m,&m));
471:   PetscCall(PetscBLASIntCast(ds->l,&l));
472:   PetscCall(PetscBLASIntCast(ds->ld,&ld));
473:   n1 = n-l;     /* n1 = size of leading block, excl. locked + size of trailing block */
474:   m1 = m-l;
475:   off = l+l*ld;
476:   if (ds->compact) PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
477:   PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
478:   PetscCall(MatDenseGetArrayWrite(ds->omat[DS_MAT_U],&U));
479:   PetscCall(MatDenseGetArrayWrite(ds->omat[DS_MAT_V],&V));
480:   PetscCall(DSGetArrayReal(ds,DS_MAT_T,&d));
481:   e = d+ld;
482:   PetscCall(PetscArrayzero(U,ld*ld));
483:   for (i=0;i<l;i++) U[i+i*ld] = 1.0;
484:   PetscCall(PetscArrayzero(V,ld*ld));
485:   for (i=0;i<l;i++) V[i+i*ld] = 1.0;

487:   if (ds->state>DS_STATE_RAW) {
488:     /* solve bidiagonal SVD problem */
489:     for (i=0;i<l;i++) wr[i] = d[i];
490: #if defined(PETSC_USE_COMPLEX)
491:     PetscCall(DSAllocateWork_Private(ds,0,3*n1*n1+4*n1+2*ld*ld,8*n1));
492:     Ur = ds->rwork+3*n1*n1+4*n1;
493:     Vr = ds->rwork+3*n1*n1+4*n1+ld*ld;
494: #else
495:     PetscCall(DSAllocateWork_Private(ds,0,3*n1*n1+4*n1+ld*ld,8*n1));
496:     Ur = U;
497:     Vr = ds->rwork+3*n1*n1+4*n1;
498: #endif
499:     PetscCallBLAS("LAPACKbdsdc",LAPACKbdsdc_("U","I",&n1,d+l,e+l,Ur+off,&ld,Vr+off,&ld,NULL,NULL,ds->rwork,ds->iwork,&info));
500:     SlepcCheckLapackInfo("bdsdc",info);
501:     for (i=l;i<n;i++) {
502:       for (j=l;j<n;j++) {
503: #if defined(PETSC_USE_COMPLEX)
504:         U[i+j*ld] = Ur[i+j*ld];
505: #endif
506:         V[i+j*ld] = PetscConj(Vr[j+i*ld]);  /* transpose VT returned by Lapack */
507:       }
508:     }
509:   } else {
510:     /* solve general rectangular SVD problem */
511:     PetscCall(DSAllocateMat_Private(ds,DS_MAT_W));
512:     PetscCall(MatDenseGetArrayWrite(ds->omat[DS_MAT_W],&W));
513:     if (ds->compact) PetscCall(DSSwitchFormat_SVD(ds));
514:     for (i=0;i<l;i++) wr[i] = d[i];
515:     nm = PetscMin(n,m);
516:     PetscCall(DSAllocateWork_Private(ds,0,0,8*nm));
517:     lwork = -1;
518: #if defined(PETSC_USE_COMPLEX)
519:     PetscCall(DSAllocateWork_Private(ds,0,5*nm*nm+7*nm,0));
520:     PetscCallBLAS("LAPACKgesdd",LAPACKgesdd_("A",&n1,&m1,A+off,&ld,d+l,U+off,&ld,W+off,&ld,&qwork,&lwork,ds->rwork,ds->iwork,&info));
521: #else
522:     PetscCallBLAS("LAPACKgesdd",LAPACKgesdd_("A",&n1,&m1,A+off,&ld,d+l,U+off,&ld,W+off,&ld,&qwork,&lwork,ds->iwork,&info));
523: #endif
524:     SlepcCheckLapackInfo("gesdd",info);
525:     PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(qwork),&lwork));
526:     PetscCall(DSAllocateWork_Private(ds,lwork,0,0));
527: #if defined(PETSC_USE_COMPLEX)
528:     PetscCallBLAS("LAPACKgesdd",LAPACKgesdd_("A",&n1,&m1,A+off,&ld,d+l,U+off,&ld,W+off,&ld,ds->work,&lwork,ds->rwork,ds->iwork,&info));
529: #else
530:     PetscCallBLAS("LAPACKgesdd",LAPACKgesdd_("A",&n1,&m1,A+off,&ld,d+l,U+off,&ld,W+off,&ld,ds->work,&lwork,ds->iwork,&info));
531: #endif
532:     SlepcCheckLapackInfo("gesdd",info);
533:     for (i=l;i<m;i++) {
534:       for (j=l;j<m;j++) V[i+j*ld] = PetscConj(W[j+i*ld]);  /* transpose VT returned by Lapack */
535:     }
536:     PetscCall(MatDenseRestoreArrayWrite(ds->omat[DS_MAT_W],&W));
537:   }
538:   for (i=l;i<PetscMin(ds->n,ctx->m);i++) wr[i] = d[i];

540:   /* create diagonal matrix as a result */
541:   if (ds->compact) PetscCall(PetscArrayzero(e,n-1));
542:   else {
543:     for (i=l;i<m;i++) PetscCall(PetscArrayzero(A+l+i*ld,n-l));
544:     for (i=l;i<n;i++) A[i+i*ld] = d[i];
545:   }
546:   PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
547:   PetscCall(MatDenseRestoreArrayWrite(ds->omat[DS_MAT_U],&U));
548:   PetscCall(MatDenseRestoreArrayWrite(ds->omat[DS_MAT_V],&V));
549:   PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&d));
550:   PetscFunctionReturn(PETSC_SUCCESS);
551: }

553: #if !defined(PETSC_HAVE_MPIUNI)
554: static PetscErrorCode DSSynchronize_SVD(DS ds,PetscScalar eigr[],PetscScalar eigi[])
555: {
556:   PetscInt       ld=ds->ld,l=ds->l,k=0,kr=0;
557:   PetscMPIInt    n,rank,off=0,size,ldn,ld3;
558:   PetscScalar    *A,*U,*V;
559:   PetscReal      *T;

561:   PetscFunctionBegin;
562:   if (ds->compact) kr = 3*ld;
563:   else k = (ds->n-l)*ld;
564:   if (ds->state>DS_STATE_RAW) k += 2*(ds->n-l)*ld;
565:   if (eigr) k += ds->n-l;
566:   PetscCall(DSAllocateWork_Private(ds,k+kr,0,0));
567:   PetscCall(PetscMPIIntCast(k*sizeof(PetscScalar)+kr*sizeof(PetscReal),&size));
568:   PetscCall(PetscMPIIntCast(ds->n-l,&n));
569:   PetscCall(PetscMPIIntCast(ld*(ds->n-l),&ldn));
570:   PetscCall(PetscMPIIntCast(3*ld,&ld3));
571:   if (ds->compact) PetscCall(DSGetArrayReal(ds,DS_MAT_T,&T));
572:   else PetscCall(MatDenseGetArray(ds->omat[DS_MAT_A],&A));
573:   if (ds->state>DS_STATE_RAW) {
574:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_U],&U));
575:     PetscCall(MatDenseGetArray(ds->omat[DS_MAT_V],&V));
576:   }
577:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank));
578:   if (!rank) {
579:     if (ds->compact) PetscCallMPI(MPI_Pack(T,ld3,MPIU_REAL,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
580:     else PetscCallMPI(MPI_Pack(A+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
581:     if (ds->state>DS_STATE_RAW) {
582:       PetscCallMPI(MPI_Pack(U+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
583:       PetscCallMPI(MPI_Pack(V+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
584:     }
585:     if (eigr) PetscCallMPI(MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds)));
586:   }
587:   PetscCallMPI(MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds)));
588:   if (rank) {
589:     if (ds->compact) PetscCallMPI(MPI_Unpack(ds->work,size,&off,T,ld3,MPIU_REAL,PetscObjectComm((PetscObject)ds)));
590:     else PetscCallMPI(MPI_Unpack(ds->work,size,&off,A+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
591:     if (ds->state>DS_STATE_RAW) {
592:       PetscCallMPI(MPI_Unpack(ds->work,size,&off,U+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
593:       PetscCallMPI(MPI_Unpack(ds->work,size,&off,V+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
594:     }
595:     if (eigr) PetscCallMPI(MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds)));
596:   }
597:   if (ds->compact) PetscCall(DSRestoreArrayReal(ds,DS_MAT_T,&T));
598:   else PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_A],&A));
599:   if (ds->state>DS_STATE_RAW) {
600:     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_U],&U));
601:     PetscCall(MatDenseRestoreArray(ds->omat[DS_MAT_V],&V));
602:   }
603:   PetscFunctionReturn(PETSC_SUCCESS);
604: }
605: #endif

607: static PetscErrorCode DSMatGetSize_SVD(DS ds,DSMatType t,PetscInt *rows,PetscInt *cols)
608: {
609:   DS_SVD *ctx = (DS_SVD*)ds->data;

611:   PetscFunctionBegin;
612:   PetscCheck(ctx->m,PetscObjectComm((PetscObject)ds),PETSC_ERR_ORDER,"You should set the number of columns with DSSVDSetDimensions()");
613:   switch (t) {
614:     case DS_MAT_A:
615:       *rows = ds->n;
616:       *cols = ds->extrarow? ctx->m+1: ctx->m;
617:       break;
618:     case DS_MAT_T:
619:       *rows = ds->n;
620:       *cols = PetscDefined(USE_COMPLEX)? 2: 3;
621:       break;
622:     case DS_MAT_U:
623:       *rows = ds->state==DS_STATE_TRUNCATED? ds->t: ds->n;
624:       *cols = ds->n;
625:       break;
626:     case DS_MAT_V:
627:       *rows = ds->state==DS_STATE_TRUNCATED? ctx->t: ctx->m;
628:       *cols = ctx->m;
629:       break;
630:     default:
631:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid t parameter");
632:   }
633:   PetscFunctionReturn(PETSC_SUCCESS);
634: }

636: static PetscErrorCode DSSVDSetDimensions_SVD(DS ds,PetscInt m)
637: {
638:   DS_SVD *ctx = (DS_SVD*)ds->data;

640:   PetscFunctionBegin;
641:   DSCheckAlloc(ds,1);
642:   if (m==PETSC_DECIDE || m==PETSC_DEFAULT) {
643:     ctx->m = ds->ld;
644:   } else {
645:     PetscCheck(m>0 && m<=ds->ld,PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Illegal value of m. Must be between 1 and ld");
646:     ctx->m = m;
647:   }
648:   PetscFunctionReturn(PETSC_SUCCESS);
649: }

651: /*@
652:    DSSVDSetDimensions - Sets the number of columns for a DSSVD.

654:    Logically Collective

656:    Input Parameters:
657: +  ds - the direct solver context
658: -  m  - the number of columns

660:    Notes:
661:    This call is complementary to DSSetDimensions(), to provide a dimension
662:    that is specific to this DS type.

664:    Level: intermediate

666: .seealso: DSSVDGetDimensions(), DSSetDimensions()
667: @*/
668: PetscErrorCode DSSVDSetDimensions(DS ds,PetscInt m)
669: {
670:   PetscFunctionBegin;
673:   PetscTryMethod(ds,"DSSVDSetDimensions_C",(DS,PetscInt),(ds,m));
674:   PetscFunctionReturn(PETSC_SUCCESS);
675: }

677: static PetscErrorCode DSSVDGetDimensions_SVD(DS ds,PetscInt *m)
678: {
679:   DS_SVD *ctx = (DS_SVD*)ds->data;

681:   PetscFunctionBegin;
682:   *m = ctx->m;
683:   PetscFunctionReturn(PETSC_SUCCESS);
684: }

686: /*@
687:    DSSVDGetDimensions - Returns the number of columns for a DSSVD.

689:    Not Collective

691:    Input Parameter:
692: .  ds - the direct solver context

694:    Output Parameters:
695: .  m - the number of columns

697:    Level: intermediate

699: .seealso: DSSVDSetDimensions()
700: @*/
701: PetscErrorCode DSSVDGetDimensions(DS ds,PetscInt *m)
702: {
703:   PetscFunctionBegin;
705:   PetscAssertPointer(m,2);
706:   PetscUseMethod(ds,"DSSVDGetDimensions_C",(DS,PetscInt*),(ds,m));
707:   PetscFunctionReturn(PETSC_SUCCESS);
708: }

710: static PetscErrorCode DSDestroy_SVD(DS ds)
711: {
712:   PetscFunctionBegin;
713:   PetscCall(PetscFree(ds->data));
714:   PetscCall(PetscObjectComposeFunction((PetscObject)ds,"DSSVDSetDimensions_C",NULL));
715:   PetscCall(PetscObjectComposeFunction((PetscObject)ds,"DSSVDGetDimensions_C",NULL));
716:   PetscFunctionReturn(PETSC_SUCCESS);
717: }

719: static PetscErrorCode DSSetCompact_SVD(DS ds,PetscBool comp)
720: {
721:   PetscFunctionBegin;
722:   if (!comp) PetscCall(DSAllocateMat_Private(ds,DS_MAT_A));
723:   PetscFunctionReturn(PETSC_SUCCESS);
724: }

726: /*MC
727:    DSSVD - Dense Singular Value Decomposition.

729:    Level: beginner

731:    Notes:
732:    The problem is expressed as A = U*Sigma*V', where A is rectangular in
733:    general, with n rows and m columns. Sigma is a diagonal matrix whose diagonal
734:    elements are the arguments of DSSolve(). After solve, A is overwritten
735:    with Sigma.

737:    The orthogonal (or unitary) matrices of left and right singular vectors, U
738:    and V, have size n and m, respectively. The number of columns m must
739:    be specified via DSSVDSetDimensions().

741:    If the DS object is in the intermediate state, A is assumed to be in upper
742:    bidiagonal form (possibly with an arrow) and is stored in compact format
743:    on matrix T. Otherwise, no particular structure is assumed. The compact
744:    storage is implemented for the square case only, m=n. The extra row should
745:    be interpreted in this case as an extra column.

747:    Used DS matrices:
748: +  DS_MAT_A - problem matrix (used only if compact=false)
749: .  DS_MAT_T - upper bidiagonal matrix
750: .  DS_MAT_U - left singular vectors
751: -  DS_MAT_V - right singular vectors

753:    Implemented methods:
754: +  0 - Implicit zero-shift QR for bidiagonals (_bdsqr)
755: -  1 - Divide and Conquer (_bdsdc or _gesdd)

757: .seealso: DSCreate(), DSSetType(), DSType, DSSVDSetDimensions()
758: M*/
759: SLEPC_EXTERN PetscErrorCode DSCreate_SVD(DS ds)
760: {
761:   DS_SVD         *ctx;

763:   PetscFunctionBegin;
764:   PetscCall(PetscNew(&ctx));
765:   ds->data = (void*)ctx;

767:   ds->ops->allocate      = DSAllocate_SVD;
768:   ds->ops->view          = DSView_SVD;
769:   ds->ops->vectors       = DSVectors_SVD;
770:   ds->ops->solve[0]      = DSSolve_SVD_QR;
771:   ds->ops->solve[1]      = DSSolve_SVD_DC;
772:   ds->ops->sort          = DSSort_SVD;
773:   ds->ops->truncate      = DSTruncate_SVD;
774:   ds->ops->update        = DSUpdateExtraRow_SVD;
775:   ds->ops->destroy       = DSDestroy_SVD;
776:   ds->ops->matgetsize    = DSMatGetSize_SVD;
777: #if !defined(PETSC_HAVE_MPIUNI)
778:   ds->ops->synchronize   = DSSynchronize_SVD;
779: #endif
780:   ds->ops->setcompact    = DSSetCompact_SVD;
781:   PetscCall(PetscObjectComposeFunction((PetscObject)ds,"DSSVDSetDimensions_C",DSSVDSetDimensions_SVD));
782:   PetscCall(PetscObjectComposeFunction((PetscObject)ds,"DSSVDGetDimensions_C",DSSVDGetDimensions_SVD));
783:   PetscFunctionReturn(PETSC_SUCCESS);
784: }