Actual source code: dvd_blas.c

  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:       
  8:    SLEPc is free software: you can redistribute it and/or modify it under  the
  9:    terms of version 3 of the GNU Lesser General Public License as published by
 10:    the Free Software Foundation.

 12:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY 
 13:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS 
 14:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for 
 15:    more details.

 17:    You  should have received a copy of the GNU Lesser General  Public  License
 18:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 19:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 20: */

 22: #include <slepc-private/vecimplslepc.h>
 23:  #include davidson.h

 25: PetscLogEvent SLEPC_SlepcDenseMatProd = 0;
 26: PetscLogEvent SLEPC_SlepcDenseNorm = 0;
 27: PetscLogEvent SLEPC_SlepcDenseCopy = 0;
 28: PetscLogEvent SLEPC_VecsMult = 0;

 30: void dvd_sum_local(void *in, void *out, PetscMPIInt *cnt,MPI_Datatype *t);
 31: PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
 32:                                    void *ptr);

 36: /*
 37:   Compute C <- a*A*B + b*C, where
 38:     ldC, the leading dimension of C,
 39:     ldA, the leading dimension of A,
 40:     rA, cA, rows and columns of A,
 41:     At, if true use the transpose of A instead,
 42:     ldB, the leading dimension of B,
 43:     rB, cB, rows and columns of B,
 44:     Bt, if true use the transpose of B instead
 45: */
 46: PetscErrorCode SlepcDenseMatProd(PetscScalar *C, PetscInt _ldC, PetscScalar b,
 47:   PetscScalar a,
 48:   const PetscScalar *A, PetscInt _ldA, PetscInt rA, PetscInt cA, PetscBool At,
 49:   const PetscScalar *B, PetscInt _ldB, PetscInt rB, PetscInt cB, PetscBool Bt)
 50: {
 51:   PetscErrorCode  ierr;
 52:   PetscInt        tmp;
 53:   PetscBLASInt    m, n, k, ldA = _ldA, ldB = _ldB, ldC = _ldC;
 54:   const char      *N = "N", *T = "C", *qA = N, *qB = N;


 58:   if ((rA == 0) || (cB == 0)) { return(0); }

 63:   PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);

 65:   /* Transpose if needed */
 66:   if (At) tmp = rA, rA = cA, cA = tmp, qA = T;
 67:   if (Bt) tmp = rB, rB = cB, cB = tmp, qB = T;
 68: 
 69:   /* Check size */
 70:   if (cA != rB) SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
 71: 
 72:   /* Do stub */
 73:   if ((rA == 1) && (cA == 1) && (cB == 1)) {
 74:     if (!At && !Bt) *C = *A * *B;
 75:     else if (At && !Bt) *C = PetscConj(*A) * *B;
 76:     else if (!At && Bt) *C = *A * PetscConj(*B);
 77:     else *C = PetscConj(*A) * PetscConj(*B);
 78:     m = n = k = 1;
 79:   } else {
 80:     m = rA; n = cB; k = cA;
 81:     BLASgemm_(qA, qB, &m, &n, &k, &a, (PetscScalar*)A, &ldA, (PetscScalar*)B,
 82:               &ldB, &b, C, &ldC);
 83:   }

 85:   PetscLogFlops(m*n*2*k);
 86:   PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);

 88:   return(0);
 89: }

 93: /*
 94:   Compute C <- A*B, where
 95:     sC, structure of C,
 96:     ldC, the leading dimension of C,
 97:     sA, structure of A,
 98:     ldA, the leading dimension of A,
 99:     rA, cA, rows and columns of A,
100:     At, if true use the transpose of A instead,
101:     sB, structure of B,
102:     ldB, the leading dimension of B,
103:     rB, cB, rows and columns of B,
104:     Bt, if true use the transpose of B instead
105: */
106: PetscErrorCode SlepcDenseMatProdTriang(
107:   PetscScalar *C, MatType_t sC, PetscInt ldC,
108:   const PetscScalar *A, MatType_t sA, PetscInt ldA, PetscInt rA, PetscInt cA,
109:   PetscBool At,
110:   const PetscScalar *B, MatType_t sB, PetscInt ldB, PetscInt rB, PetscInt cB,
111:   PetscBool Bt)
112: {
113:   PetscErrorCode  ierr;
114:   PetscInt        tmp;
115:   PetscScalar     one=1.0, zero=0.0;
116:   PetscBLASInt    rC, cC, _ldA = ldA, _ldB = ldB, _ldC = ldC;


120:   if ((rA == 0) || (cB == 0)) { return(0); }

125:   /* Transpose if needed */
126:   if (At) tmp = rA, rA = cA, cA = tmp;
127:   if (Bt) tmp = rB, rB = cB, cB = tmp;
128: 
129:   /* Check size */
130:   if (cA != rB) SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
131:   if (sB != 0) SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for B");

133:   /* Optimized version: trivial case */
134:   if ((rA == 1) && (cA == 1) && (cB == 1)) {
135:     if (!At && !Bt)     *C = *A * *B;
136:     else if (At && !Bt) *C = PetscConj(*A) * *B;
137:     else if (!At && Bt) *C = *A * PetscConj(*B);
138:     else if (At && Bt)  *C = PetscConj(*A) * PetscConj(*B);
139:     return(0);
140:   }
141: 
142:   /* Optimized versions: sA == 0 && sB == 0 */
143:   if ((sA == 0) && (sB == 0)) {
144:     if (At) tmp = rA, rA = cA, cA = tmp;
145:     if (Bt) tmp = rB, rB = cB, cB = tmp;
146:     SlepcDenseMatProd(C, ldC, 0.0, 1.0, A, ldA, rA, cA, At, B, ldB, rB,
147:                              cB, Bt);
148:     PetscFunctionReturn(ierr);
149:   }

151:   /* Optimized versions: A hermitian && (B not triang) */
152:   if (DVD_IS(sA,DVD_MAT_HERMITIAN) &&
153:       DVD_ISNOT(sB,DVD_MAT_UTRIANG) &&
154:       DVD_ISNOT(sB,DVD_MAT_LTRIANG)    ) {
155:     PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
156:     rC = rA; cC = cB;
157:     BLASsymm_("L", DVD_ISNOT(sA,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
158:               (PetscScalar*)A, &_ldA, (PetscScalar*)B, &_ldB, &zero, C, &_ldC);
159:     PetscLogFlops(rA*cB*cA);
160:     PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
161:     return(0);
162:   }

164:   /* Optimized versions: B hermitian && (A not triang) */
165:   if (DVD_IS(sB,DVD_MAT_HERMITIAN) &&
166:       DVD_ISNOT(sA,DVD_MAT_UTRIANG) &&
167:       DVD_ISNOT(sA,DVD_MAT_LTRIANG)    ) {
168:     PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
169:     rC = rA; cC = cB;
170:     BLASsymm_("R", DVD_ISNOT(sB,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
171:               (PetscScalar*)B, &_ldB, (PetscScalar*)A, &_ldA, &zero, C, &_ldC);
172:     PetscLogFlops(rA*cB*cA);
173:     PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
174:     return(0);
175:   }
176: 
177:   SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for A");
178: }

182: /*
183:   Normalize the columns of the matrix A, where
184:     ldA, the leading dimension of A,
185:     rA, cA, rows and columns of A.
186:   if eigi is given, the pairs of contiguous columns i i+1 such as eigi[i] != 0
187:   are normalized as being one column.
188: */
189: PetscErrorCode SlepcDenseNorm(PetscScalar *A, PetscInt ldA, PetscInt _rA,
190:                               PetscInt cA, PetscScalar *eigi)
191: {
192:   PetscErrorCode  ierr;
193:   PetscInt        i;
194:   PetscScalar     norm, norm0;
195:   PetscBLASInt    rA = _rA, one=1;


201:   PetscLogEventBegin(SLEPC_SlepcDenseNorm,0,0,0,0);

203:   for(i=0; i<cA; i++) {
204:     if(eigi && eigi[i] != 0.0) {
205:       norm = BLASnrm2_(&rA, &A[i*ldA], &one);
206:       norm0 = BLASnrm2_(&rA, &A[(i+1)*ldA], &one);
207:       norm = 1.0/PetscSqrtScalar(norm*norm + norm0*norm0);
208:       BLASscal_(&rA, &norm, &A[i*ldA], &one);
209:       BLASscal_(&rA, &norm, &A[(i+1)*ldA], &one);
210:       i++;
211:     } else {
212:       norm = BLASnrm2_(&rA, &A[i*ldA], &one);
213:       norm = 1.0 / norm;
214:       BLASscal_(&rA, &norm, &A[i*ldA], &one);
215:      }
216:   }

218:   PetscLogEventEnd(SLEPC_SlepcDenseNorm,0,0,0,0);

220:   return(0);
221: }

225: /*
226:   Y <- X, where
227:   ldX, leading dimension of X,
228:   rX, cX, rows and columns of X
229:   ldY, leading dimension of Y
230: */
231: PetscErrorCode SlepcDenseCopy(PetscScalar *Y, PetscInt ldY, PetscScalar *X,
232:                               PetscInt ldX, PetscInt rX, PetscInt cX)
233: {
234:   PetscErrorCode  ierr;
235:   PetscInt        i;


241:   if ((ldX < rX) || (ldY < rX)) SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
242: 
243:   /* Quick exit */
244:   if (Y == X) {
245:     if (ldX != ldY) SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
246:     return(0);
247:   }

249:   PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);
250:   for(i=0; i<cX; i++) {
251:     PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
252: 
253:   }
254:   PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);

256:   return(0);
257: }

261: /*
262:   Y <- X, where
263:   ldX, leading dimension of X,
264:   rX, cX, rows and columns of X
265:   ldY, leading dimension of Y
266: */
267: PetscErrorCode SlepcDenseCopyTriang(PetscScalar *Y, MatType_t sY, PetscInt ldY,
268:                                     PetscScalar *X, MatType_t sX, PetscInt ldX,
269:                                     PetscInt rX, PetscInt cX)
270: {
271:   PetscErrorCode  ierr;
272:   PetscInt        i,j,c;


278:   if ((ldX < rX) || (ldY < rX)) SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");

280:   if (sY == 0 && sX == 0) {
281:     SlepcDenseCopy(Y, ldY, X, ldX, rX, cX);
282:     return(0);
283:   }

285:   if (rX != cX) SETERRQ(PETSC_COMM_SELF,1, "Rectangular matrices not supported");

287:   if (DVD_IS(sX,DVD_MAT_UTRIANG) &&
288:       DVD_ISNOT(sX,DVD_MAT_LTRIANG)) {        /* UpTr to ... */
289:     if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
290:         DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
291:       c = 0;                                      /*     so copy */
292:     else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
293:             DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
294:       c = 1;                                      /*     so transpose */
295:     else                                          /* ... Full, */
296:       c = 2;                                      /*     so reflect from up */
297:   } else if (DVD_ISNOT(sX,DVD_MAT_UTRIANG) &&
298:              DVD_IS(sX,DVD_MAT_LTRIANG)) {    /* LoTr to ... */
299:     if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
300:         DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
301:       c = 1;                                      /*     so transpose */
302:     else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
303:             DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
304:       c = 0;                                      /*     so copy */
305:     else                                          /* ... Full, */
306:       c = 3;                                      /*     so reflect fr. down */
307:   } else                                          /* Full to any, */
308:     c = 0;                                        /*     so copy */
309: 
310:   PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);

312:   switch(c) {
313:   case 0: /* copy */
314:     for(i=0; i<cX; i++) {
315:       PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
316: 
317:     }
318:     break;

320:   case 1: /* transpose */
321:     for(i=0; i<cX; i++)
322:       for(j=0; j<rX; j++)
323:         Y[ldY*j+i] = PetscConj(X[ldX*i+j]);
324:     break;

326:   case 2: /* reflection from up */
327:     for(i=0; i<cX; i++)
328:       for(j=0; j<PetscMin(i+1,rX); j++)
329:         Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
330:     break;

332:   case 3: /* reflection from down */
333:     for(i=0; i<cX; i++)
334:       for(j=i; j<rX; j++)
335:         Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
336:     break;
337:   }
338:   PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);

340:   return(0);
341: }


346: /*
347:   Compute Y[0..cM-1] <- alpha * X[0..cX-1] * M + beta * Y[0..cM-1],
348:   where X and Y are contiguous global vectors.
349: */
350: PetscErrorCode SlepcUpdateVectorsZ(Vec *Y, PetscScalar beta, PetscScalar alpha,
351:   Vec *X, PetscInt cX, const PetscScalar *M, PetscInt ldM, PetscInt rM,
352:   PetscInt cM)
353: {
354:   PetscErrorCode  ierr;


358:   SlepcUpdateVectorsS(Y, 1, beta, alpha, X, cX, 1, M, ldM, rM, cM);
359: 

361:   return(0);
362: }


367: /*
368:   Compute Y[0:dY:cM*dY-1] <- alpha * X[0:dX:cX-1] * M + beta * Y[0:dY:cM*dY-1],
369:   where X and Y are contiguous global vectors.
370: */
371: PetscErrorCode SlepcUpdateVectorsS(Vec *Y, PetscInt dY, PetscScalar beta,
372:   PetscScalar alpha, Vec *X, PetscInt cX, PetscInt dX, const PetscScalar *M,
373:   PetscInt ldM, PetscInt rM, PetscInt cM)
374: {
375:   PetscErrorCode    ierr;
376:   const PetscScalar *px;
377:   PetscScalar       *py;
378:   PetscInt          rX, rY, ldX, ldY, i, rcX;

381:   SlepcValidVecsContiguous(Y,cM*dY,1);
382:   SlepcValidVecsContiguous(X,cX,5);

385:   /* Compute the real number of columns */
386:   rcX = cX/dX;
387:   if (rcX != rM) SETERRQ(((PetscObject)*Y)->comm,1, "Matrix dimensions do not match");

389:   if ((rcX == 0) || (rM == 0) || (cM == 0)) {
390:     return(0);
391:   } else if ((Y + cM <= X) || (X + cX <= Y) ||
392:              ((X != Y) && ((PetscMax(dX,dY))%(PetscMin(dX,dY))!=0))) {
393:     /* If Y[0..cM-1] and X[0..cX-1] are not overlapped... */

395:     /* Get the dense matrices and dimensions associated to Y and X */
396:     VecGetLocalSize(X[0], &rX);
397:     VecGetLocalSize(Y[0], &rY);
398:     if (rX != rY) SETERRQ(((PetscObject)*Y)->comm,1, "The multivectors do not have the same dimension");
399:     VecGetArrayRead(X[0], &px);
400:     VecGetArray(Y[0], &py);

402:     /* Update the strides */
403:     ldX = rX*dX; ldY= rY*dY;

405:     /* Do operation */
406:     SlepcDenseMatProd(py, ldY, beta, alpha, px, ldX, rX, rcX,
407:                     PETSC_FALSE, M, ldM, rM, cM, PETSC_FALSE);
408: 
409:     VecRestoreArrayRead(X[0], &px);
410:     VecRestoreArray(Y[0], &py);
411:     for(i=1; i<cM; i++) {
412:       PetscObjectStateIncrease((PetscObject)Y[dY*i]);
413:     }

415:   } else if ((Y >= X) && (beta == 0.0) && (dY == dX)) {
416:     /* If not, call to SlepcUpdateVectors */
417:     SlepcUpdateStrideVectors(cX, X, Y-X, dX, Y-X+cM*dX, M-ldM*(Y-X),
418:                                     ldM, PETSC_FALSE);
419:     if (alpha != 1.0)
420:       for (i=0; i<cM; i++) {
421:         VecScale(Y[i], alpha);
422:       }
423:   } else SETERRQ(((PetscObject)*Y)->comm,1, "Unsupported case");
424:   return(0);
425: }

429: /*
430:   Compute X <- alpha * X[0:dX:cX-1] * M
431:   where X is a matrix with non-consecutive columns
432: */
433: PetscErrorCode SlepcUpdateVectorsD(Vec *X, PetscInt cX, PetscScalar alpha,
434:   const PetscScalar *M, PetscInt ldM, PetscInt rM, PetscInt cM,
435:   PetscScalar *work, PetscInt lwork)
436: {
438:   PetscScalar    **px, *Y, *Z;
439:   PetscInt       rX, i, j, rY, rY0, ldY;

442:   SlepcValidVecsContiguous(X,cX,1);

446:   if (cX != rM) SETERRQ(((PetscObject)*X)->comm,1, "Matrix dimensions do not match");

448:   rY = (lwork/2)/cX;
449:   if (rY <= 0) SETERRQ(((PetscObject)*X)->comm,1, "Insufficient work space given");
450:   Y = work; Z = &Y[cX*rY]; ldY = rY;

452:   if ((cX == 0) || (rM == 0) || (cM == 0)) {
453:     return(0);
454:   }

456:   /* Get the dense vectors associated to the columns of X */
457:   PetscMalloc(sizeof(Vec)*cX, &px);
458:   for(i=0; i<cX; i++) {
459:     VecGetArray(X[i], &px[i]);
460:   }
461:   VecGetLocalSize(X[0], &rX);

463:   for(i=0, rY0=0; i<rX; i+=rY0) {
464:     rY0 = PetscMin(rY, rX-i);

466:     /* Y <- X[i0:i1,:] */
467:     for(j=0; j<cX; j++) {
468:       SlepcDenseCopy(&Y[ldY*j], ldY, px[j]+i, rX, rY0, 1);
469: 
470:     }

472:     /* Z <- Y * M */
473:     SlepcDenseMatProd(Z, ldY, 0.0, alpha, Y, ldY, rY0, cX, PETSC_FALSE,
474:                                                  M, ldM, rM, cM, PETSC_FALSE);
475: 

477:     /* X <- Z */
478:     for(j=0; j<cM; j++) {
479:       SlepcDenseCopy(px[j]+i, rX, &Z[j*ldY], ldY, rY0, 1);
480: 
481:     }
482:   }

484:   for(i=0; i<cX; i++) {
485:     VecRestoreArray(X[i], &px[i]);
486:   }
487:   PetscFree(px);

489:   return(0);
490: }



496: /* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
497:                  [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
498:   where W = U' * V.
499:   workS0 and workS1 are an auxiliary scalar vector of size
500:   (eU-sU)*sV*(sU!=0)+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
501:   is needed, and of size eU*eV.
502: */
503: PetscErrorCode VecsMult(PetscScalar *M, MatType_t sM, PetscInt ldM,
504:                         Vec *U, PetscInt sU, PetscInt eU,
505:                         Vec *V, PetscInt sV, PetscInt eV,
506:                         PetscScalar *workS0, PetscScalar *workS1)
507: {
508:   PetscErrorCode    ierr;
509:   PetscInt          ldU, ldV, i, j, k, ms = (eU-sU)*sV*(sU==0?0:1)+(eV-sV)*eU;
510:   const PetscScalar *pu, *pv;
511:   PetscScalar       *W, *Wr;


515:   /* Check if quick exit */
516:   if ((eU-sU == 0) || (eV-sV == 0))
517:     return(0);

519:   SlepcValidVecsContiguous(U,eU,4);
520:   SlepcValidVecsContiguous(V,eV,7);
522: 
523:   /* Get the dense matrices and dimensions associated to U and V */
524:   VecGetLocalSize(U[0], &ldU);
525:   VecGetLocalSize(V[0], &ldV);
526:   if (ldU != ldV) SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
527:   VecGetArrayRead(U[0], &pu);
528:   VecGetArrayRead(V[0], &pv);

530:   if (workS0) {
532:     W = workS0;
533:   } else {
534:     PetscMalloc(sizeof(PetscScalar)*ms, &W);
535: 
536:   }

538:   PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);

540:   if ((sU == 0) && (sV == 0) && (eU == ldM)) {
541:     /* Use the smart memory usage version */

543:     /* W <- U' * V */
544:     SlepcDenseMatProdTriang(W, sM, eU,
545:                                    pu, 0, ldU, ldU, eU, PETSC_TRUE,
546:                                    pv, 0, ldV, ldV, eV, PETSC_FALSE);
547: 
548: 
549:     /* ReduceAll(W, SUM) */
550:     MPI_Allreduce(W, M, eU*eV, MPIU_SCALAR, MPIU_SUM,
551:                          ((PetscObject)U[0])->comm);
552:   /* Full M matrix */
553:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
554:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
555:     if (workS1) {
557:       Wr = workS1;
558:       if (PetscAbs(PetscMin(W-workS1, workS1-W)) < ms) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
559:     } else {
560:       PetscMalloc(sizeof(PetscScalar)*ms, &Wr);
561: 
562:     }
563: 
564:     /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
565:     if (sU > 0) {
566:       SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
567:                                pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
568:                                pv       , ldV, ldV, sV,    PETSC_FALSE);
569: 
570:     }
571: 
572:     /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
573:     SlepcDenseMatProd(W+(eU-sU)*sV*(sU > 0?1:0), eU, 0.0, 1.0,
574:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
575:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
576: 
577: 
578:     /* ReduceAll(W, SUM) */
579:     MPI_Allreduce(W, Wr, ms, MPIU_SCALAR,
580:                       MPIU_SUM, ((PetscObject)U[0])->comm);
581: 
582:     /* M(...,...) <- W */
583:     k = 0;
584:     if (sU > 0) for (i=0; i<sV; i++)
585:       for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
586:     for (i=sV; i<eV; i++)
587:       for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
588: 
589:     if (!workS1) {
590:       PetscFree(Wr);
591:     }

593:   /* Upper triangular M matrix */
594:   } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
595:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
596:     if (workS1) {
598:       Wr = workS1;
599:       if (PetscAbs(PetscMin(W-workS1,workS1-W)) < (eV-sV)*eU) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
600:     } else {
601:       PetscMalloc(sizeof(PetscScalar)*(eV-sV)*eU, &Wr);
602: 
603:     }
604: 
605:     /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
606:     SlepcDenseMatProd(W,         eU,  0.0, 1.0,
607:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
608:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
609: 
610: 
611:     /* ReduceAll(W, SUM) */
612:     MPI_Allreduce(W, Wr, (eV-sV)*eU, MPIU_SCALAR, MPIU_SUM,
613:                          ((PetscObject)U[0])->comm);
614: 
615:     /* M(...,...) <- W */
616:     for (i=sV,k=0; i<eV; i++)
617:         for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];

619:     if (!workS1) {
620:       PetscFree(Wr);
621:     }

623:   /* Lower triangular M matrix */
624:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
625:              DVD_IS(sM,DVD_MAT_LTRIANG)) {
626:     if (workS1) {
628:       Wr = workS1;
629:       if (PetscMin(W - workS1, workS1 - W) < (eU-sU)*eV) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
630:     } else {
631:       PetscMalloc(sizeof(PetscScalar)*(eU-sU)*eV, &Wr);
632:     }
633: 
634:     /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
635:     SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
636:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
637:                              pv       , ldV, ldV, eV,    PETSC_FALSE);
638: 
639: 
640:     /* ReduceAll(W, SUM) */
641:     MPI_Allreduce(W, Wr, (eU-sU)*eV, MPIU_SCALAR, MPIU_SUM,
642:                          ((PetscObject)U[0])->comm);
643: 
644:     /* M(...,...) <- W */
645:     for (i=0,k=0; i<eV; i++)
646:       for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
647: 
648:     if (!workS1) {
649:       PetscFree(Wr);
650:     }
651:   }

653:   PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);

655:   if (!workS0) {
656:     PetscFree(W);
657:   }

659:   VecRestoreArrayRead(U[0], &pu);
660:   VecRestoreArrayRead(V[0], &pv);
661:   return(0);
662: }


667: /* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
668:                  [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
669:   where W = local_U' * local_V. Needs VecsMultIb for completing the operation!
670:   workS0 and workS1 are an auxiliary scalar vector of size
671:   (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
672:   is needed, and of size eU*eV.
673: */
674: PetscErrorCode VecsMultIa(PetscScalar *M, MatType_t sM, PetscInt ldM,
675:                           Vec *U, PetscInt sU, PetscInt eU,
676:                           Vec *V, PetscInt sV, PetscInt eV)
677: {
678:   PetscErrorCode  ierr;
679:   PetscInt        ldU, ldV;
680:   PetscScalar     *pu, *pv;


684:   /* Check if quick exit */
685:   if ((eU-sU == 0) || (eV-sV == 0))
686:     return(0);
687: 
688:   SlepcValidVecsContiguous(U,eU,4);
689:   SlepcValidVecsContiguous(V,eV,7);

692:   /* Get the dense matrices and dimensions associated to U and V */
693:   VecGetLocalSize(U[0], &ldU);
694:   VecGetLocalSize(V[0], &ldV);
695:   if (ldU != ldV) SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
696:   VecGetArray(U[0], &pu);
697:   VecGetArray(V[0], &pv);

699:   if ((sU == 0) && (sV == 0) && (eU == ldM)) {
700:     /* M <- local_U' * local_V */
701:     SlepcDenseMatProdTriang(M, sM, eU,
702:                                    pu, 0, ldU, ldU, eU, PETSC_TRUE,
703:                                    pv, 0, ldV, ldV, eV, PETSC_FALSE);
704: 
705: 
706:   /* Full M matrix */
707:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
708:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
709:     /* M(sU:eU-1,0:sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
710:     SlepcDenseMatProd(&M[sU], ldM, 0.0, 1.0,
711:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
712:                              pv       , ldV, ldV, sV,    PETSC_FALSE);
713: 
714: 
715:     /* M(0:eU-1,sV:eV-1) <- U(0:eU-1)' * V(sV:eV-1) */
716:     SlepcDenseMatProd(&M[ldM*sV], ldM, 0.0, 1.0,
717:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
718:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
719: 
720: 
721:   /* Other structures */
722:   } else SETERRQ(((PetscObject)*U)->comm,1, "Matrix structure not supported");

724:   VecRestoreArray(U[0], &pu);
725:   PetscObjectStateDecrease((PetscObject)U[0]);
726:   VecRestoreArray(V[0], &pv);
727:   PetscObjectStateDecrease((PetscObject)V[0]);

729:   return(0);
730: }


735: /* Computes M <- nprocs*M
736:   where nprocs is the number of processors.
737: */
738: PetscErrorCode VecsMultIc(PetscScalar *M, MatType_t sM, PetscInt ldM,
739:                           PetscInt rM, PetscInt cM, Vec V)
740: {
741:   int        i,j,n;


745:   /* Check if quick exit */
746:   if ((rM == 0) || (cM == 0))
747:     return(0);
749: 
750:   if (sM != 0) SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");

752:   MPI_Comm_size(((PetscObject)V)->comm, &n);

754:   for(i=0; i<cM; i++)
755:     for(j=0; j<rM; j++)
756:       M[ldM*i+j]/= (PetscScalar)n;

758:   return(0);
759: }


764: /* Computes N <- Allreduce( [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ] )
765:                           ( [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ] )
766:   where W = U' * V.
767:   workS0 and workS1 are an auxiliary scalar vector of size
768:   (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
769:   is needed, and of size eU*eV.
770: */
771: PetscErrorCode VecsMultIb(PetscScalar *M, MatType_t sM, PetscInt ldM,
772:                           PetscInt rM, PetscInt cM, PetscScalar *auxS,
773:                           Vec V)
774: {
775:   PetscErrorCode  ierr;
776:   PetscScalar     *W, *Wr;


780:   /* Check if quick exit */
781:   if ((rM == 0) || (cM == 0))
782:     return(0);
784: 
785:   if (auxS) {
787:     W = auxS;
788:   } else {
789:     PetscMalloc(sizeof(PetscScalar)*rM*cM*2, &W);
790: 
791:   }
792:   Wr = W + rM*cM;

794:   PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);

796:   if (sM == 0) {
797:     /* W <- M */
798:     SlepcDenseCopy(W, rM, M, ldM, rM, cM);

800:     /* Wr <- ReduceAll(W, SUM) */
801:     MPI_Allreduce(W, Wr, rM*cM, MPIU_SCALAR, MPIU_SUM,
802:                          ((PetscObject)V)->comm);

804:     /* M <- Wr */
805:     SlepcDenseCopy(M, ldM, Wr, rM, rM, cM);

807:   /* Other structures */
808:   } else SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");

810:   PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);

812:   if (!auxS) {
813:     PetscFree(W);
814:   }

816:   return(0);
817: }


822: /* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
823:                  [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
824:   where W = U' * V.
825:   r, a DvdReduction structure,
826:   sr, an structure DvdMult_copy_func.
827: */
828: PetscErrorCode VecsMultS(PetscScalar *M, MatType_t sM, PetscInt ldM,
829:                          Vec *U, PetscInt sU, PetscInt eU,
830:                          Vec *V, PetscInt sV, PetscInt eV, DvdReduction *r,
831:                          DvdMult_copy_func *sr)
832: {
833:   PetscErrorCode    ierr;
834:   PetscInt          ldU, ldV, ms = (eU-sU)*sV*(sU==0?0:1)+(eV-sV)*eU;
835:   const PetscScalar *pu, *pv;
836:   PetscScalar       *W;


840:   /* Check if quick exit */
841:   if ((eU-sU == 0) || (eV-sV == 0))
842:     return(0);
843: 
844:   SlepcValidVecsContiguous(U,eU,4);
845:   SlepcValidVecsContiguous(V,eV,7);

848:   /* Get the dense matrices and dimensions associated to U and V */
849:   VecGetLocalSize(U[0], &ldU);
850:   VecGetLocalSize(V[0], &ldV);
851:   if (ldU != ldV) SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
852:   VecGetArrayRead(U[0], &pu);
853:   VecGetArrayRead(V[0], &pv);

855:   PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);

857:   if ((sU == 0) && (sV == 0)) {
858:     /* Use the smart memory usage version */

860:     /* Add the reduction to r */
861:     SlepcAllReduceSum(r, eU*eV, VecsMultS_copy_func, sr, &W);
862: 

864:     /* W <- U' * V */
865:     SlepcDenseMatProdTriang(W, sM, eU,
866:                                    pu, 0, ldU, ldU, eU, PETSC_TRUE,
867:                                    pv, 0, ldV, ldV, eV, PETSC_FALSE);
868: 
869: 
870:     /* M <- ReduceAll(W, SUM) */
871:     sr->M = M;    sr->ld = ldM;
872:     sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
873:                   sr->i2 = eV;

875:   /* Full M matrix */
876:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
877:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
878:     /* Add the reduction to r */
879:     SlepcAllReduceSum(r, ms, VecsMultS_copy_func, sr, &W);
880: 

882:     /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
883:     SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
884:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
885:                              pv       , ldV, ldV, sV,    PETSC_FALSE);
886: 
887: 
888:     /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
889:     SlepcDenseMatProd(W+(eU-sU)*sV*(sU > 0?1:0), eU, 0.0, 1.0,
890:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
891:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
892: 
893: 
894:     /* M <- ReduceAll(W, SUM) */
895:     sr->M = M;            sr->ld = ldM;
896:     sr->i0 = sU>0?0:sV;   sr->i1 = sV;    sr->s0 = sU;    sr->e0 = eU;
897:                           sr->i2 = eV;    sr->s1 = 0;     sr->e1 = eU;

899:   /* Upper triangular M matrix */
900:   } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
901:              DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
902:     /* Add the reduction to r */
903:     SlepcAllReduceSum(r, (eV-sV)*eU, VecsMultS_copy_func, sr, &W);
904: 
905: 
906:     /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
907:     SlepcDenseMatProd(W,         eU,  0.0, 1.0,
908:                              pu,        ldU, ldU, eU,    PETSC_TRUE,
909:                              pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
910: 
911: 
912:     /* M <- ReduceAll(W, SUM) */
913:     sr->M = M;    sr->ld = ldM;
914:     sr->i0 = sV;  sr->i1 = eV;    sr->s0 = 0;     sr->e0 = eU;
915:                   sr->i2 = eV;
916: 
917:   /* Lower triangular M matrix */
918:   } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
919:              DVD_IS(sM,DVD_MAT_LTRIANG)) {
920:     /* Add the reduction to r */
921:     SlepcAllReduceSum(r, (eU-sU)*eV, VecsMultS_copy_func, sr, &W);
922: 
923: 
924:     /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
925:     SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
926:                              pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
927:                              pv       , ldV, ldV, eV,    PETSC_FALSE);
928: 
929: 
930:     /* ReduceAll(W, SUM) */
931:     sr->M = M;    sr->ld = ldM;
932:     sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
933:                   sr->i2 = eV;
934:   }

936:   PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);

938:   VecRestoreArrayRead(U[0], &pu);
939:   VecRestoreArrayRead(V[0], &pv);
940:   return(0);
941: }

945: PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
946:                                    void *ptr)
947: {
948:   PetscInt        i, j, k;
949:   DvdMult_copy_func
950:                   *sr = (DvdMult_copy_func*)ptr;


955:   for (i=sr->i0,k=0; i<sr->i1; i++)
956:     for (j=sr->ld*i+sr->s0; j<sr->ld*i+sr->e0; j++,k++) sr->M[j] = out[k];
957:   for (i=sr->i1; i<sr->i2; i++)
958:     for (j=sr->ld*i+sr->s1; j<sr->ld*i+sr->e1; j++,k++) sr->M[j] = out[k];

960:   if (k != size_out) SETERRQ(PETSC_COMM_SELF,1, "Wrong size");

962:   return(0);
963: }

967: /* Orthonormalize a chunk of parallel vector.
968:    NOTE: wS0 and wS1 must be of size n*n.
969: */
970: PetscErrorCode VecsOrthonormalize(Vec *V, PetscInt n, PetscScalar *wS0,
971:                                   PetscScalar *wS1)
972: {
973: #if defined(PETSC_MISSING_LAPACK_PBTRF)
975:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PBTRF - Lapack routine is unavailable");
976: #else
977:   PetscErrorCode  ierr;
978:   PetscBLASInt    nn = n, info, ld;
979:   PetscInt        ldV, i;
980:   PetscScalar     *H, *T, one=1.0, *pv;
981: 

984:   if (!wS0) {
985:     PetscMalloc(sizeof(PetscScalar)*n*n, &H);
986:   } else {
988:     H = wS0;
989:   }
990:   if (!wS1) {
991:     PetscMalloc(sizeof(PetscScalar)*n*n, &T);
992:   } else {
994:     T = wS1;
995:   }

997:   /* H <- V' * V */
998:   VecsMult(H, 0, n, V, 0, n, V, 0, n, T, PETSC_NULL);

1000:   /* H <- chol(H) */
1001:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1002:   LAPACKpbtrf_("U", &nn, &nn, H, &nn, &info);
1003:   PetscFPTrapPop();
1004:   if (info) SETERRQ1(((PetscObject)*V)->comm,PETSC_ERR_LIB, "Error in Lapack PBTRF %d", info);

1006:   /* V <- V * inv(H) */
1007:   VecGetLocalSize(V[0], &ldV);
1008:   VecGetArray(V[0], &pv);
1009:   ld = ldV;
1010:   BLAStrsm_("R", "U", "N", "N", &ld, &nn, &one, H, &nn, pv, &ld);
1011:   VecRestoreArray(V[0], &pv);
1012:   for(i=1; i<n; i++) {
1013:     PetscObjectStateIncrease((PetscObject)V[i]);
1014:   }

1016:   if (!wS0) {
1017:     PetscFree(H);
1018:   }
1019:   if (!wS1) {
1020:     PetscFree(T);
1021:   }
1022: 
1023:   return(0);
1024: #endif
1025: }

1029: /* 
1030:   Sum up several arrays with only one call to MPIReduce.
1031: */
1032: PetscErrorCode SlepcAllReduceSumBegin(DvdReductionChunk *ops,
1033:                                       PetscInt max_size_ops,
1034:                                       PetscScalar *in, PetscScalar *out,
1035:                                       PetscInt max_size_in, DvdReduction *r,
1036:                                       MPI_Comm comm)
1037: {

1042:   r->in = in;
1043:   r->out = out;
1044:   r->size_in = 0;
1045:   r->max_size_in = max_size_in;
1046:   r->ops = ops;
1047:   r->size_ops = 0;
1048:   r->max_size_ops = max_size_ops;
1049:   r->comm = comm;

1051:   return(0);
1052: }

1056: PetscErrorCode SlepcAllReduceSum(DvdReduction *r, PetscInt size_in,
1057:                                  DvdReductionPostF f, void *ptr,
1058:                                  PetscScalar **in)
1059: {
1061:   *in = r->in + r->size_in;
1062:   r->ops[r->size_ops].out = r->out + r->size_in;
1063:   r->ops[r->size_ops].size_out = size_in;
1064:   r->ops[r->size_ops].f = f;
1065:   r->ops[r->size_ops].ptr = ptr;
1066:   if (++(r->size_ops) > r->max_size_ops) SETERRQ(PETSC_COMM_SELF,1, "max_size_ops is not large enough");
1067:   if ((r->size_in+= size_in) > r->max_size_in) SETERRQ(PETSC_COMM_SELF,1, "max_size_in is not large enough");
1068:   return(0);
1069: }


1074: PetscErrorCode SlepcAllReduceSumEnd(DvdReduction *r)
1075: {
1076:   PetscErrorCode  ierr;
1077:   PetscInt        i;


1081:   /* Check if quick exit */
1082:   if (r->size_ops == 0)
1083:     return(0);

1085:   /* Call the MPIAllReduce routine */
1086:   MPI_Allreduce(r->in, r->out, r->size_in, MPIU_SCALAR, MPIU_SUM,
1087:                        r->comm);

1089:   /* Call the postponed routines */
1090:   for(i=0; i<r->size_ops; i++) {
1091:     r->ops[i].f(r->ops[i].out, r->ops[i].size_out, r->ops[i].ptr);
1092: 
1093:   }

1095:   /* Tag the operation as done */
1096:   r->size_ops = 0;

1098:   return(0);
1099: }


1104: /* auxS: size_cX+V_new_e */
1105: PetscErrorCode dvd_orthV(IP ip, Vec *defl, PetscInt size_DS, Vec *cX,
1106:                          PetscInt size_cX, Vec *V, PetscInt V_new_s,
1107:                          PetscInt V_new_e, PetscScalar *auxS,
1108:                          PetscRandom rand)
1109: {
1110:   PetscErrorCode  ierr;
1111:   PetscInt        i, j;
1112:   PetscBool       lindep;
1113:   PetscReal       norm;
1114:   PetscScalar     *auxS0 = auxS;
1115: 
1117: 
1118:   /* Orthonormalize V with IP */
1119:   for (i=V_new_s; i<V_new_e; i++) {
1120:     for(j=0; j<3; j++) {
1121:       if (j>0) { SlepcVecSetRandom(V[i], rand);  }
1122:       if (cX + size_cX == V) {
1123:         /* If cX and V are contiguous, orthogonalize in one step */
1124:         IPOrthogonalize(ip, size_DS, defl, size_cX+i, PETSC_NULL, cX,
1125:                                V[i], auxS0, &norm, &lindep);
1126:       } else if (defl) {
1127:         /* Else orthogonalize first against defl, and then against cX and V */
1128:         IPOrthogonalize(ip, size_DS, defl, size_cX, PETSC_NULL, cX,
1129:                                V[i], auxS0, PETSC_NULL, &lindep);
1130:         if(!lindep) {
1131:           IPOrthogonalize(ip, 0, PETSC_NULL, i, PETSC_NULL, V,
1132:                                  V[i], auxS0, &norm, &lindep);
1133:         }
1134:       } else {
1135:         /* Else orthogonalize first against cX and then against V */
1136:         IPOrthogonalize(ip, size_cX, cX, i, PETSC_NULL, V,
1137:                                V[i], auxS0, &norm, &lindep);
1138:       }
1139:       if(!lindep && (norm > PETSC_SQRT_MACHINE_EPSILON)) break;
1140:       PetscInfo1(ip,"Orthonormalization problems adding the vector %D to the searching subspace\n",i);
1141:     }
1142:     if(lindep || (norm < PETSC_SQRT_MACHINE_EPSILON)) SETERRQ(((PetscObject)ip)->comm,1, "Error during orthonormalization of eigenvectors");
1143:     VecScale(V[i], 1.0/norm);
1144:   }
1145: 
1146:   return(0);
1147: }


1152: /* auxS: size_cX+V_new_e+1 */
1153: PetscErrorCode dvd_BorthV_faster(IP ip, Vec *defl, Vec *BDS,PetscReal *BDSn, PetscInt size_DS, Vec *cX,
1154:                          Vec *BcX, PetscReal *BcXn, PetscInt size_cX, Vec *V, Vec *BV,PetscReal *BVn,
1155:                          PetscInt V_new_s, PetscInt V_new_e,
1156:                          PetscScalar *auxS, PetscRandom rand)
1157: {
1158:   PetscErrorCode  ierr;
1159:   PetscInt        i, j;
1160:   PetscBool       lindep;
1161:   PetscReal       norm;
1162:   PetscScalar     *auxS0 = auxS;
1163: 
1165: 
1166:   /* Orthonormalize V with IP */
1167:   for (i=V_new_s; i<V_new_e; i++) {
1168:     for(j=0; j<3; j++) {
1169:       if (j>0) { SlepcVecSetRandom(V[i], rand);  }
1170:       if (cX + size_cX == V && BcX + size_cX == BV) {
1171:         /* If cX and V are contiguous, orthogonalize in one step */
1172:         IPBOrthogonalize(ip, size_DS, defl, BDS, BDSn, size_cX+i, PETSC_NULL, cX, BcX, BcXn,
1173:                                V[i], BV[i], auxS0, &norm, &lindep);
1174:       } else if (defl) {
1175:         /* Else orthogonalize first against defl, and then against cX and V */
1176:         IPBOrthogonalize(ip, size_DS, defl, BDS, BDSn, size_cX, PETSC_NULL, cX, BcX, BcXn,
1177:                                V[i], BV[i], auxS0, PETSC_NULL, &lindep);
1178:         if(!lindep) {
1179:           IPBOrthogonalize(ip, 0, PETSC_NULL, PETSC_NULL, PETSC_NULL, i, PETSC_NULL, V, BV, BVn,
1180:                                   V[i], BV[i], auxS0, &norm, &lindep);
1181:         }
1182:       } else {
1183:         /* Else orthogonalize first against cX and then against V */
1184:         IPBOrthogonalize(ip, size_cX, cX, BcX, BcXn, i, PETSC_NULL, V, BV, BVn,
1185:                                 V[i], BV[i], auxS0, &norm, &lindep);
1186:       }
1187:       if(!lindep && (PetscAbs(norm) > PETSC_SQRT_MACHINE_EPSILON)) break;
1188:       PetscInfo1(ip, "Orthonormalization problems adding the vector %d to the searching subspace\n", i);
1189: 
1190:     }
1191:     if(lindep || (PetscAbs(norm) < PETSC_SQRT_MACHINE_EPSILON)) {
1192:         SETERRQ(((PetscObject)ip)->comm,1, "Error during the orthonormalization of the eigenvectors");
1193:     }
1194:     if (BVn) BVn[i] = norm > 0.0 ? 1.0 : -1.0;
1195:     norm = PetscAbs(norm);
1196:     VecScale(V[i], 1.0/norm);
1197:     VecScale(BV[i], 1.0/norm);
1198:   }
1199: 
1200:   return(0);
1201: }

1205: /* auxS: size_cX+V_new_e+1 */
1206: PetscErrorCode dvd_BorthV_stable(IP ip,Vec *defl,PetscReal *BDSn,PetscInt size_DS,Vec *cX,PetscReal *BcXn,PetscInt size_cX,Vec *V,PetscReal *BVn,PetscInt V_new_s,PetscInt V_new_e,PetscScalar *auxS,PetscRandom rand)
1207: {
1208:   PetscErrorCode  ierr;
1209:   PetscInt        i, j;
1210:   PetscBool       lindep;
1211:   PetscReal       norm;
1212:   PetscScalar     *auxS0 = auxS;
1213: 
1215: 
1216:   /* Orthonormalize V with IP */
1217:   for (i=V_new_s; i<V_new_e; i++) {
1218:     for(j=0; j<3; j++) {
1219:       if (j>0) {
1220:         SlepcVecSetRandom(V[i],rand);
1221:         PetscInfo1(ip,"Orthonormalization problems adding the vector %d to the searching subspace\n",i);
1222:       }
1223:       /* Orthogonalize against the deflation, if needed */
1224:       if (defl) {
1225:         IPPseudoOrthogonalize(ip,size_DS,defl,BDSn,V[i],auxS0,PETSC_NULL,&lindep);
1226:         if (lindep) continue;
1227:       }
1228:       /* If cX and V are contiguous, orthogonalize in one step */
1229:       if (cX + size_cX == V) {
1230:         IPPseudoOrthogonalize(ip,size_cX+i,cX,BcXn,V[i],auxS0,&norm,&lindep);
1231:       /* Else orthogonalize first against cX and then against V */
1232:       } else {
1233:         IPPseudoOrthogonalize(ip,size_cX,cX,BcXn,V[i],auxS0,PETSC_NULL,&lindep);
1234:         if (lindep) continue;
1235:         IPPseudoOrthogonalize(ip,i,V,BVn,V[i],auxS0,&norm,&lindep);
1236:       }
1237:       if(!lindep && (PetscAbs(norm) > PETSC_MACHINE_EPSILON)) break;
1238:     }
1239:     if(lindep || (PetscAbs(norm) < PETSC_MACHINE_EPSILON)) {
1240:         SETERRQ(((PetscObject)ip)->comm,1, "Error during the orthonormalization of the eigenvectors");
1241:     }
1242:     if (BVn) BVn[i] = norm > 0.0 ? 1.0 : -1.0;
1243:     norm = PetscAbs(norm);
1244:     VecScale(V[i], 1.0/norm);
1245:   }
1246: 
1247:   return(0);
1248: }