Actual source code: dvd_blas.c
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2012, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7:
8: SLEPc is free software: you can redistribute it and/or modify it under the
9: terms of version 3 of the GNU Lesser General Public License as published by
10: the Free Software Foundation.
12: SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
13: WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14: FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
15: more details.
17: You should have received a copy of the GNU Lesser General Public License
18: along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20: */
22: #include <slepc-private/vecimplslepc.h>
23: #include davidson.h
25: PetscLogEvent SLEPC_SlepcDenseMatProd = 0;
26: PetscLogEvent SLEPC_SlepcDenseNorm = 0;
27: PetscLogEvent SLEPC_SlepcDenseCopy = 0;
28: PetscLogEvent SLEPC_VecsMult = 0;
30: void dvd_sum_local(void *in, void *out, PetscMPIInt *cnt,MPI_Datatype *t);
31: PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
32: void *ptr);
36: /*
37: Compute C <- a*A*B + b*C, where
38: ldC, the leading dimension of C,
39: ldA, the leading dimension of A,
40: rA, cA, rows and columns of A,
41: At, if true use the transpose of A instead,
42: ldB, the leading dimension of B,
43: rB, cB, rows and columns of B,
44: Bt, if true use the transpose of B instead
45: */
46: PetscErrorCode SlepcDenseMatProd(PetscScalar *C, PetscInt _ldC, PetscScalar b,
47: PetscScalar a,
48: const PetscScalar *A, PetscInt _ldA, PetscInt rA, PetscInt cA, PetscBool At,
49: const PetscScalar *B, PetscInt _ldB, PetscInt rB, PetscInt cB, PetscBool Bt)
50: {
51: PetscErrorCode ierr;
52: PetscInt tmp;
53: PetscBLASInt m, n, k, ldA = _ldA, ldB = _ldB, ldC = _ldC;
54: const char *N = "N", *T = "C", *qA = N, *qB = N;
58: if ((rA == 0) || (cB == 0)) { return(0); }
63: PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
65: /* Transpose if needed */
66: if (At) tmp = rA, rA = cA, cA = tmp, qA = T;
67: if (Bt) tmp = rB, rB = cB, cB = tmp, qB = T;
68:
69: /* Check size */
70: if (cA != rB) SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
71:
72: /* Do stub */
73: if ((rA == 1) && (cA == 1) && (cB == 1)) {
74: if (!At && !Bt) *C = *A * *B;
75: else if (At && !Bt) *C = PetscConj(*A) * *B;
76: else if (!At && Bt) *C = *A * PetscConj(*B);
77: else *C = PetscConj(*A) * PetscConj(*B);
78: m = n = k = 1;
79: } else {
80: m = rA; n = cB; k = cA;
81: BLASgemm_(qA, qB, &m, &n, &k, &a, (PetscScalar*)A, &ldA, (PetscScalar*)B,
82: &ldB, &b, C, &ldC);
83: }
85: PetscLogFlops(m*n*2*k);
86: PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
88: return(0);
89: }
93: /*
94: Compute C <- A*B, where
95: sC, structure of C,
96: ldC, the leading dimension of C,
97: sA, structure of A,
98: ldA, the leading dimension of A,
99: rA, cA, rows and columns of A,
100: At, if true use the transpose of A instead,
101: sB, structure of B,
102: ldB, the leading dimension of B,
103: rB, cB, rows and columns of B,
104: Bt, if true use the transpose of B instead
105: */
106: PetscErrorCode SlepcDenseMatProdTriang(
107: PetscScalar *C, MatType_t sC, PetscInt ldC,
108: const PetscScalar *A, MatType_t sA, PetscInt ldA, PetscInt rA, PetscInt cA,
109: PetscBool At,
110: const PetscScalar *B, MatType_t sB, PetscInt ldB, PetscInt rB, PetscInt cB,
111: PetscBool Bt)
112: {
113: PetscErrorCode ierr;
114: PetscInt tmp;
115: PetscScalar one=1.0, zero=0.0;
116: PetscBLASInt rC, cC, _ldA = ldA, _ldB = ldB, _ldC = ldC;
120: if ((rA == 0) || (cB == 0)) { return(0); }
125: /* Transpose if needed */
126: if (At) tmp = rA, rA = cA, cA = tmp;
127: if (Bt) tmp = rB, rB = cB, cB = tmp;
128:
129: /* Check size */
130: if (cA != rB) SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
131: if (sB != 0) SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for B");
133: /* Optimized version: trivial case */
134: if ((rA == 1) && (cA == 1) && (cB == 1)) {
135: if (!At && !Bt) *C = *A * *B;
136: else if (At && !Bt) *C = PetscConj(*A) * *B;
137: else if (!At && Bt) *C = *A * PetscConj(*B);
138: else if (At && Bt) *C = PetscConj(*A) * PetscConj(*B);
139: return(0);
140: }
141:
142: /* Optimized versions: sA == 0 && sB == 0 */
143: if ((sA == 0) && (sB == 0)) {
144: if (At) tmp = rA, rA = cA, cA = tmp;
145: if (Bt) tmp = rB, rB = cB, cB = tmp;
146: SlepcDenseMatProd(C, ldC, 0.0, 1.0, A, ldA, rA, cA, At, B, ldB, rB,
147: cB, Bt);
148: PetscFunctionReturn(ierr);
149: }
151: /* Optimized versions: A hermitian && (B not triang) */
152: if (DVD_IS(sA,DVD_MAT_HERMITIAN) &&
153: DVD_ISNOT(sB,DVD_MAT_UTRIANG) &&
154: DVD_ISNOT(sB,DVD_MAT_LTRIANG) ) {
155: PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
156: rC = rA; cC = cB;
157: BLASsymm_("L", DVD_ISNOT(sA,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
158: (PetscScalar*)A, &_ldA, (PetscScalar*)B, &_ldB, &zero, C, &_ldC);
159: PetscLogFlops(rA*cB*cA);
160: PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
161: return(0);
162: }
164: /* Optimized versions: B hermitian && (A not triang) */
165: if (DVD_IS(sB,DVD_MAT_HERMITIAN) &&
166: DVD_ISNOT(sA,DVD_MAT_UTRIANG) &&
167: DVD_ISNOT(sA,DVD_MAT_LTRIANG) ) {
168: PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);
169: rC = rA; cC = cB;
170: BLASsymm_("R", DVD_ISNOT(sB,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
171: (PetscScalar*)B, &_ldB, (PetscScalar*)A, &_ldA, &zero, C, &_ldC);
172: PetscLogFlops(rA*cB*cA);
173: PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);
174: return(0);
175: }
176:
177: SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for A");
178: }
182: /*
183: Normalize the columns of the matrix A, where
184: ldA, the leading dimension of A,
185: rA, cA, rows and columns of A.
186: if eigi is given, the pairs of contiguous columns i i+1 such as eigi[i] != 0
187: are normalized as being one column.
188: */
189: PetscErrorCode SlepcDenseNorm(PetscScalar *A, PetscInt ldA, PetscInt _rA,
190: PetscInt cA, PetscScalar *eigi)
191: {
192: PetscErrorCode ierr;
193: PetscInt i;
194: PetscScalar norm, norm0;
195: PetscBLASInt rA = _rA, one=1;
201: PetscLogEventBegin(SLEPC_SlepcDenseNorm,0,0,0,0);
203: for(i=0; i<cA; i++) {
204: if(eigi && eigi[i] != 0.0) {
205: norm = BLASnrm2_(&rA, &A[i*ldA], &one);
206: norm0 = BLASnrm2_(&rA, &A[(i+1)*ldA], &one);
207: norm = 1.0/PetscSqrtScalar(norm*norm + norm0*norm0);
208: BLASscal_(&rA, &norm, &A[i*ldA], &one);
209: BLASscal_(&rA, &norm, &A[(i+1)*ldA], &one);
210: i++;
211: } else {
212: norm = BLASnrm2_(&rA, &A[i*ldA], &one);
213: norm = 1.0 / norm;
214: BLASscal_(&rA, &norm, &A[i*ldA], &one);
215: }
216: }
218: PetscLogEventEnd(SLEPC_SlepcDenseNorm,0,0,0,0);
220: return(0);
221: }
225: /*
226: Y <- X, where
227: ldX, leading dimension of X,
228: rX, cX, rows and columns of X
229: ldY, leading dimension of Y
230: */
231: PetscErrorCode SlepcDenseCopy(PetscScalar *Y, PetscInt ldY, PetscScalar *X,
232: PetscInt ldX, PetscInt rX, PetscInt cX)
233: {
234: PetscErrorCode ierr;
235: PetscInt i;
241: if ((ldX < rX) || (ldY < rX)) SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
242:
243: /* Quick exit */
244: if (Y == X) {
245: if (ldX != ldY) SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
246: return(0);
247: }
249: PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);
250: for(i=0; i<cX; i++) {
251: PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
252:
253: }
254: PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);
256: return(0);
257: }
261: /*
262: Y <- X, where
263: ldX, leading dimension of X,
264: rX, cX, rows and columns of X
265: ldY, leading dimension of Y
266: */
267: PetscErrorCode SlepcDenseCopyTriang(PetscScalar *Y, MatType_t sY, PetscInt ldY,
268: PetscScalar *X, MatType_t sX, PetscInt ldX,
269: PetscInt rX, PetscInt cX)
270: {
271: PetscErrorCode ierr;
272: PetscInt i,j,c;
278: if ((ldX < rX) || (ldY < rX)) SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
280: if (sY == 0 && sX == 0) {
281: SlepcDenseCopy(Y, ldY, X, ldX, rX, cX);
282: return(0);
283: }
285: if (rX != cX) SETERRQ(PETSC_COMM_SELF,1, "Rectangular matrices not supported");
287: if (DVD_IS(sX,DVD_MAT_UTRIANG) &&
288: DVD_ISNOT(sX,DVD_MAT_LTRIANG)) { /* UpTr to ... */
289: if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
290: DVD_ISNOT(sY,DVD_MAT_LTRIANG)) /* ... UpTr, */
291: c = 0; /* so copy */
292: else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
293: DVD_IS(sY,DVD_MAT_LTRIANG)) /* ... LoTr, */
294: c = 1; /* so transpose */
295: else /* ... Full, */
296: c = 2; /* so reflect from up */
297: } else if (DVD_ISNOT(sX,DVD_MAT_UTRIANG) &&
298: DVD_IS(sX,DVD_MAT_LTRIANG)) { /* LoTr to ... */
299: if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
300: DVD_ISNOT(sY,DVD_MAT_LTRIANG)) /* ... UpTr, */
301: c = 1; /* so transpose */
302: else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
303: DVD_IS(sY,DVD_MAT_LTRIANG)) /* ... LoTr, */
304: c = 0; /* so copy */
305: else /* ... Full, */
306: c = 3; /* so reflect fr. down */
307: } else /* Full to any, */
308: c = 0; /* so copy */
309:
310: PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);
312: switch(c) {
313: case 0: /* copy */
314: for(i=0; i<cX; i++) {
315: PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
316:
317: }
318: break;
320: case 1: /* transpose */
321: for(i=0; i<cX; i++)
322: for(j=0; j<rX; j++)
323: Y[ldY*j+i] = PetscConj(X[ldX*i+j]);
324: break;
326: case 2: /* reflection from up */
327: for(i=0; i<cX; i++)
328: for(j=0; j<PetscMin(i+1,rX); j++)
329: Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
330: break;
332: case 3: /* reflection from down */
333: for(i=0; i<cX; i++)
334: for(j=i; j<rX; j++)
335: Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
336: break;
337: }
338: PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);
340: return(0);
341: }
346: /*
347: Compute Y[0..cM-1] <- alpha * X[0..cX-1] * M + beta * Y[0..cM-1],
348: where X and Y are contiguous global vectors.
349: */
350: PetscErrorCode SlepcUpdateVectorsZ(Vec *Y, PetscScalar beta, PetscScalar alpha,
351: Vec *X, PetscInt cX, const PetscScalar *M, PetscInt ldM, PetscInt rM,
352: PetscInt cM)
353: {
354: PetscErrorCode ierr;
358: SlepcUpdateVectorsS(Y, 1, beta, alpha, X, cX, 1, M, ldM, rM, cM);
359:
361: return(0);
362: }
367: /*
368: Compute Y[0:dY:cM*dY-1] <- alpha * X[0:dX:cX-1] * M + beta * Y[0:dY:cM*dY-1],
369: where X and Y are contiguous global vectors.
370: */
371: PetscErrorCode SlepcUpdateVectorsS(Vec *Y, PetscInt dY, PetscScalar beta,
372: PetscScalar alpha, Vec *X, PetscInt cX, PetscInt dX, const PetscScalar *M,
373: PetscInt ldM, PetscInt rM, PetscInt cM)
374: {
375: PetscErrorCode ierr;
376: const PetscScalar *px;
377: PetscScalar *py;
378: PetscInt rX, rY, ldX, ldY, i, rcX;
381: SlepcValidVecsContiguous(Y,cM*dY,1);
382: SlepcValidVecsContiguous(X,cX,5);
385: /* Compute the real number of columns */
386: rcX = cX/dX;
387: if (rcX != rM) SETERRQ(((PetscObject)*Y)->comm,1, "Matrix dimensions do not match");
389: if ((rcX == 0) || (rM == 0) || (cM == 0)) {
390: return(0);
391: } else if ((Y + cM <= X) || (X + cX <= Y) ||
392: ((X != Y) && ((PetscMax(dX,dY))%(PetscMin(dX,dY))!=0))) {
393: /* If Y[0..cM-1] and X[0..cX-1] are not overlapped... */
395: /* Get the dense matrices and dimensions associated to Y and X */
396: VecGetLocalSize(X[0], &rX);
397: VecGetLocalSize(Y[0], &rY);
398: if (rX != rY) SETERRQ(((PetscObject)*Y)->comm,1, "The multivectors do not have the same dimension");
399: VecGetArrayRead(X[0], &px);
400: VecGetArray(Y[0], &py);
402: /* Update the strides */
403: ldX = rX*dX; ldY= rY*dY;
405: /* Do operation */
406: SlepcDenseMatProd(py, ldY, beta, alpha, px, ldX, rX, rcX,
407: PETSC_FALSE, M, ldM, rM, cM, PETSC_FALSE);
408:
409: VecRestoreArrayRead(X[0], &px);
410: VecRestoreArray(Y[0], &py);
411: for(i=1; i<cM; i++) {
412: PetscObjectStateIncrease((PetscObject)Y[dY*i]);
413: }
415: } else if ((Y >= X) && (beta == 0.0) && (dY == dX)) {
416: /* If not, call to SlepcUpdateVectors */
417: SlepcUpdateStrideVectors(cX, X, Y-X, dX, Y-X+cM*dX, M-ldM*(Y-X),
418: ldM, PETSC_FALSE);
419: if (alpha != 1.0)
420: for (i=0; i<cM; i++) {
421: VecScale(Y[i], alpha);
422: }
423: } else SETERRQ(((PetscObject)*Y)->comm,1, "Unsupported case");
424: return(0);
425: }
429: /*
430: Compute X <- alpha * X[0:dX:cX-1] * M
431: where X is a matrix with non-consecutive columns
432: */
433: PetscErrorCode SlepcUpdateVectorsD(Vec *X, PetscInt cX, PetscScalar alpha,
434: const PetscScalar *M, PetscInt ldM, PetscInt rM, PetscInt cM,
435: PetscScalar *work, PetscInt lwork)
436: {
438: PetscScalar **px, *Y, *Z;
439: PetscInt rX, i, j, rY, rY0, ldY;
442: SlepcValidVecsContiguous(X,cX,1);
446: if (cX != rM) SETERRQ(((PetscObject)*X)->comm,1, "Matrix dimensions do not match");
448: rY = (lwork/2)/cX;
449: if (rY <= 0) SETERRQ(((PetscObject)*X)->comm,1, "Insufficient work space given");
450: Y = work; Z = &Y[cX*rY]; ldY = rY;
452: if ((cX == 0) || (rM == 0) || (cM == 0)) {
453: return(0);
454: }
456: /* Get the dense vectors associated to the columns of X */
457: PetscMalloc(sizeof(Vec)*cX, &px);
458: for(i=0; i<cX; i++) {
459: VecGetArray(X[i], &px[i]);
460: }
461: VecGetLocalSize(X[0], &rX);
463: for(i=0, rY0=0; i<rX; i+=rY0) {
464: rY0 = PetscMin(rY, rX-i);
466: /* Y <- X[i0:i1,:] */
467: for(j=0; j<cX; j++) {
468: SlepcDenseCopy(&Y[ldY*j], ldY, px[j]+i, rX, rY0, 1);
469:
470: }
472: /* Z <- Y * M */
473: SlepcDenseMatProd(Z, ldY, 0.0, alpha, Y, ldY, rY0, cX, PETSC_FALSE,
474: M, ldM, rM, cM, PETSC_FALSE);
475:
477: /* X <- Z */
478: for(j=0; j<cM; j++) {
479: SlepcDenseCopy(px[j]+i, rX, &Z[j*ldY], ldY, rY0, 1);
480:
481: }
482: }
484: for(i=0; i<cX; i++) {
485: VecRestoreArray(X[i], &px[i]);
486: }
487: PetscFree(px);
489: return(0);
490: }
496: /* Computes M <- [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ]
497: [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
498: where W = U' * V.
499: workS0 and workS1 are an auxiliary scalar vector of size
500: (eU-sU)*sV*(sU!=0)+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
501: is needed, and of size eU*eV.
502: */
503: PetscErrorCode VecsMult(PetscScalar *M, MatType_t sM, PetscInt ldM,
504: Vec *U, PetscInt sU, PetscInt eU,
505: Vec *V, PetscInt sV, PetscInt eV,
506: PetscScalar *workS0, PetscScalar *workS1)
507: {
508: PetscErrorCode ierr;
509: PetscInt ldU, ldV, i, j, k, ms = (eU-sU)*sV*(sU==0?0:1)+(eV-sV)*eU;
510: const PetscScalar *pu, *pv;
511: PetscScalar *W, *Wr;
515: /* Check if quick exit */
516: if ((eU-sU == 0) || (eV-sV == 0))
517: return(0);
519: SlepcValidVecsContiguous(U,eU,4);
520: SlepcValidVecsContiguous(V,eV,7);
522:
523: /* Get the dense matrices and dimensions associated to U and V */
524: VecGetLocalSize(U[0], &ldU);
525: VecGetLocalSize(V[0], &ldV);
526: if (ldU != ldV) SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
527: VecGetArrayRead(U[0], &pu);
528: VecGetArrayRead(V[0], &pv);
530: if (workS0) {
532: W = workS0;
533: } else {
534: PetscMalloc(sizeof(PetscScalar)*ms, &W);
535:
536: }
538: PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);
540: if ((sU == 0) && (sV == 0) && (eU == ldM)) {
541: /* Use the smart memory usage version */
543: /* W <- U' * V */
544: SlepcDenseMatProdTriang(W, sM, eU,
545: pu, 0, ldU, ldU, eU, PETSC_TRUE,
546: pv, 0, ldV, ldV, eV, PETSC_FALSE);
547:
548:
549: /* ReduceAll(W, SUM) */
550: MPI_Allreduce(W, M, eU*eV, MPIU_SCALAR, MPIU_SUM,
551: ((PetscObject)U[0])->comm);
552: /* Full M matrix */
553: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
554: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
555: if (workS1) {
557: Wr = workS1;
558: if (PetscAbs(PetscMin(W-workS1, workS1-W)) < ms) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
559: } else {
560: PetscMalloc(sizeof(PetscScalar)*ms, &Wr);
561:
562: }
563:
564: /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
565: if (sU > 0) {
566: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
567: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
568: pv , ldV, ldV, sV, PETSC_FALSE);
569:
570: }
571:
572: /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
573: SlepcDenseMatProd(W+(eU-sU)*sV*(sU > 0?1:0), eU, 0.0, 1.0,
574: pu, ldU, ldU, eU, PETSC_TRUE,
575: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
576:
577:
578: /* ReduceAll(W, SUM) */
579: MPI_Allreduce(W, Wr, ms, MPIU_SCALAR,
580: MPIU_SUM, ((PetscObject)U[0])->comm);
581:
582: /* M(...,...) <- W */
583: k = 0;
584: if (sU > 0) for (i=0; i<sV; i++)
585: for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
586: for (i=sV; i<eV; i++)
587: for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
588:
589: if (!workS1) {
590: PetscFree(Wr);
591: }
593: /* Upper triangular M matrix */
594: } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
595: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
596: if (workS1) {
598: Wr = workS1;
599: if (PetscAbs(PetscMin(W-workS1,workS1-W)) < (eV-sV)*eU) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
600: } else {
601: PetscMalloc(sizeof(PetscScalar)*(eV-sV)*eU, &Wr);
602:
603: }
604:
605: /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
606: SlepcDenseMatProd(W, eU, 0.0, 1.0,
607: pu, ldU, ldU, eU, PETSC_TRUE,
608: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
609:
610:
611: /* ReduceAll(W, SUM) */
612: MPI_Allreduce(W, Wr, (eV-sV)*eU, MPIU_SCALAR, MPIU_SUM,
613: ((PetscObject)U[0])->comm);
614:
615: /* M(...,...) <- W */
616: for (i=sV,k=0; i<eV; i++)
617: for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
619: if (!workS1) {
620: PetscFree(Wr);
621: }
623: /* Lower triangular M matrix */
624: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
625: DVD_IS(sM,DVD_MAT_LTRIANG)) {
626: if (workS1) {
628: Wr = workS1;
629: if (PetscMin(W - workS1, workS1 - W) < (eU-sU)*eV) SETERRQ(PETSC_COMM_SELF,1, "Consistency broken");
630: } else {
631: PetscMalloc(sizeof(PetscScalar)*(eU-sU)*eV, &Wr);
632: }
633:
634: /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
635: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
636: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
637: pv , ldV, ldV, eV, PETSC_FALSE);
638:
639:
640: /* ReduceAll(W, SUM) */
641: MPI_Allreduce(W, Wr, (eU-sU)*eV, MPIU_SCALAR, MPIU_SUM,
642: ((PetscObject)U[0])->comm);
643:
644: /* M(...,...) <- W */
645: for (i=0,k=0; i<eV; i++)
646: for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
647:
648: if (!workS1) {
649: PetscFree(Wr);
650: }
651: }
653: PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);
655: if (!workS0) {
656: PetscFree(W);
657: }
659: VecRestoreArrayRead(U[0], &pu);
660: VecRestoreArrayRead(V[0], &pv);
661: return(0);
662: }
667: /* Computes M <- [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ]
668: [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
669: where W = local_U' * local_V. Needs VecsMultIb for completing the operation!
670: workS0 and workS1 are an auxiliary scalar vector of size
671: (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
672: is needed, and of size eU*eV.
673: */
674: PetscErrorCode VecsMultIa(PetscScalar *M, MatType_t sM, PetscInt ldM,
675: Vec *U, PetscInt sU, PetscInt eU,
676: Vec *V, PetscInt sV, PetscInt eV)
677: {
678: PetscErrorCode ierr;
679: PetscInt ldU, ldV;
680: PetscScalar *pu, *pv;
684: /* Check if quick exit */
685: if ((eU-sU == 0) || (eV-sV == 0))
686: return(0);
687:
688: SlepcValidVecsContiguous(U,eU,4);
689: SlepcValidVecsContiguous(V,eV,7);
692: /* Get the dense matrices and dimensions associated to U and V */
693: VecGetLocalSize(U[0], &ldU);
694: VecGetLocalSize(V[0], &ldV);
695: if (ldU != ldV) SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
696: VecGetArray(U[0], &pu);
697: VecGetArray(V[0], &pv);
699: if ((sU == 0) && (sV == 0) && (eU == ldM)) {
700: /* M <- local_U' * local_V */
701: SlepcDenseMatProdTriang(M, sM, eU,
702: pu, 0, ldU, ldU, eU, PETSC_TRUE,
703: pv, 0, ldV, ldV, eV, PETSC_FALSE);
704:
705:
706: /* Full M matrix */
707: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
708: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
709: /* M(sU:eU-1,0:sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
710: SlepcDenseMatProd(&M[sU], ldM, 0.0, 1.0,
711: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
712: pv , ldV, ldV, sV, PETSC_FALSE);
713:
714:
715: /* M(0:eU-1,sV:eV-1) <- U(0:eU-1)' * V(sV:eV-1) */
716: SlepcDenseMatProd(&M[ldM*sV], ldM, 0.0, 1.0,
717: pu, ldU, ldU, eU, PETSC_TRUE,
718: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
719:
720:
721: /* Other structures */
722: } else SETERRQ(((PetscObject)*U)->comm,1, "Matrix structure not supported");
724: VecRestoreArray(U[0], &pu);
725: PetscObjectStateDecrease((PetscObject)U[0]);
726: VecRestoreArray(V[0], &pv);
727: PetscObjectStateDecrease((PetscObject)V[0]);
729: return(0);
730: }
735: /* Computes M <- nprocs*M
736: where nprocs is the number of processors.
737: */
738: PetscErrorCode VecsMultIc(PetscScalar *M, MatType_t sM, PetscInt ldM,
739: PetscInt rM, PetscInt cM, Vec V)
740: {
741: int i,j,n;
745: /* Check if quick exit */
746: if ((rM == 0) || (cM == 0))
747: return(0);
749:
750: if (sM != 0) SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");
752: MPI_Comm_size(((PetscObject)V)->comm, &n);
754: for(i=0; i<cM; i++)
755: for(j=0; j<rM; j++)
756: M[ldM*i+j]/= (PetscScalar)n;
758: return(0);
759: }
764: /* Computes N <- Allreduce( [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ] )
765: ( [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ] )
766: where W = U' * V.
767: workS0 and workS1 are an auxiliary scalar vector of size
768: (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
769: is needed, and of size eU*eV.
770: */
771: PetscErrorCode VecsMultIb(PetscScalar *M, MatType_t sM, PetscInt ldM,
772: PetscInt rM, PetscInt cM, PetscScalar *auxS,
773: Vec V)
774: {
775: PetscErrorCode ierr;
776: PetscScalar *W, *Wr;
780: /* Check if quick exit */
781: if ((rM == 0) || (cM == 0))
782: return(0);
784:
785: if (auxS) {
787: W = auxS;
788: } else {
789: PetscMalloc(sizeof(PetscScalar)*rM*cM*2, &W);
790:
791: }
792: Wr = W + rM*cM;
794: PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);
796: if (sM == 0) {
797: /* W <- M */
798: SlepcDenseCopy(W, rM, M, ldM, rM, cM);
800: /* Wr <- ReduceAll(W, SUM) */
801: MPI_Allreduce(W, Wr, rM*cM, MPIU_SCALAR, MPIU_SUM,
802: ((PetscObject)V)->comm);
804: /* M <- Wr */
805: SlepcDenseCopy(M, ldM, Wr, rM, rM, cM);
807: /* Other structures */
808: } else SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");
810: PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);
812: if (!auxS) {
813: PetscFree(W);
814: }
816: return(0);
817: }
822: /* Computes M <- [ M(0:sU-1, 0:sV-1) W(0:sU-1, sV:eV-1) ]
823: [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
824: where W = U' * V.
825: r, a DvdReduction structure,
826: sr, an structure DvdMult_copy_func.
827: */
828: PetscErrorCode VecsMultS(PetscScalar *M, MatType_t sM, PetscInt ldM,
829: Vec *U, PetscInt sU, PetscInt eU,
830: Vec *V, PetscInt sV, PetscInt eV, DvdReduction *r,
831: DvdMult_copy_func *sr)
832: {
833: PetscErrorCode ierr;
834: PetscInt ldU, ldV, ms = (eU-sU)*sV*(sU==0?0:1)+(eV-sV)*eU;
835: const PetscScalar *pu, *pv;
836: PetscScalar *W;
840: /* Check if quick exit */
841: if ((eU-sU == 0) || (eV-sV == 0))
842: return(0);
843:
844: SlepcValidVecsContiguous(U,eU,4);
845: SlepcValidVecsContiguous(V,eV,7);
848: /* Get the dense matrices and dimensions associated to U and V */
849: VecGetLocalSize(U[0], &ldU);
850: VecGetLocalSize(V[0], &ldV);
851: if (ldU != ldV) SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
852: VecGetArrayRead(U[0], &pu);
853: VecGetArrayRead(V[0], &pv);
855: PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);
857: if ((sU == 0) && (sV == 0)) {
858: /* Use the smart memory usage version */
860: /* Add the reduction to r */
861: SlepcAllReduceSum(r, eU*eV, VecsMultS_copy_func, sr, &W);
862:
864: /* W <- U' * V */
865: SlepcDenseMatProdTriang(W, sM, eU,
866: pu, 0, ldU, ldU, eU, PETSC_TRUE,
867: pv, 0, ldV, ldV, eV, PETSC_FALSE);
868:
869:
870: /* M <- ReduceAll(W, SUM) */
871: sr->M = M; sr->ld = ldM;
872: sr->i0 = 0; sr->i1 = eV; sr->s0 = sU; sr->e0 = eU;
873: sr->i2 = eV;
875: /* Full M matrix */
876: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
877: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
878: /* Add the reduction to r */
879: SlepcAllReduceSum(r, ms, VecsMultS_copy_func, sr, &W);
880:
882: /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
883: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
884: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
885: pv , ldV, ldV, sV, PETSC_FALSE);
886:
887:
888: /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
889: SlepcDenseMatProd(W+(eU-sU)*sV*(sU > 0?1:0), eU, 0.0, 1.0,
890: pu, ldU, ldU, eU, PETSC_TRUE,
891: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
892:
893:
894: /* M <- ReduceAll(W, SUM) */
895: sr->M = M; sr->ld = ldM;
896: sr->i0 = sU>0?0:sV; sr->i1 = sV; sr->s0 = sU; sr->e0 = eU;
897: sr->i2 = eV; sr->s1 = 0; sr->e1 = eU;
899: /* Upper triangular M matrix */
900: } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
901: DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
902: /* Add the reduction to r */
903: SlepcAllReduceSum(r, (eV-sV)*eU, VecsMultS_copy_func, sr, &W);
904:
905:
906: /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
907: SlepcDenseMatProd(W, eU, 0.0, 1.0,
908: pu, ldU, ldU, eU, PETSC_TRUE,
909: pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
910:
911:
912: /* M <- ReduceAll(W, SUM) */
913: sr->M = M; sr->ld = ldM;
914: sr->i0 = sV; sr->i1 = eV; sr->s0 = 0; sr->e0 = eU;
915: sr->i2 = eV;
916:
917: /* Lower triangular M matrix */
918: } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
919: DVD_IS(sM,DVD_MAT_LTRIANG)) {
920: /* Add the reduction to r */
921: SlepcAllReduceSum(r, (eU-sU)*eV, VecsMultS_copy_func, sr, &W);
922:
923:
924: /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
925: SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
926: pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
927: pv , ldV, ldV, eV, PETSC_FALSE);
928:
929:
930: /* ReduceAll(W, SUM) */
931: sr->M = M; sr->ld = ldM;
932: sr->i0 = 0; sr->i1 = eV; sr->s0 = sU; sr->e0 = eU;
933: sr->i2 = eV;
934: }
936: PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);
938: VecRestoreArrayRead(U[0], &pu);
939: VecRestoreArrayRead(V[0], &pv);
940: return(0);
941: }
945: PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
946: void *ptr)
947: {
948: PetscInt i, j, k;
949: DvdMult_copy_func
950: *sr = (DvdMult_copy_func*)ptr;
955: for (i=sr->i0,k=0; i<sr->i1; i++)
956: for (j=sr->ld*i+sr->s0; j<sr->ld*i+sr->e0; j++,k++) sr->M[j] = out[k];
957: for (i=sr->i1; i<sr->i2; i++)
958: for (j=sr->ld*i+sr->s1; j<sr->ld*i+sr->e1; j++,k++) sr->M[j] = out[k];
960: if (k != size_out) SETERRQ(PETSC_COMM_SELF,1, "Wrong size");
962: return(0);
963: }
967: /* Orthonormalize a chunk of parallel vector.
968: NOTE: wS0 and wS1 must be of size n*n.
969: */
970: PetscErrorCode VecsOrthonormalize(Vec *V, PetscInt n, PetscScalar *wS0,
971: PetscScalar *wS1)
972: {
973: #if defined(PETSC_MISSING_LAPACK_PBTRF)
975: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PBTRF - Lapack routine is unavailable");
976: #else
977: PetscErrorCode ierr;
978: PetscBLASInt nn = n, info, ld;
979: PetscInt ldV, i;
980: PetscScalar *H, *T, one=1.0, *pv;
981:
984: if (!wS0) {
985: PetscMalloc(sizeof(PetscScalar)*n*n, &H);
986: } else {
988: H = wS0;
989: }
990: if (!wS1) {
991: PetscMalloc(sizeof(PetscScalar)*n*n, &T);
992: } else {
994: T = wS1;
995: }
997: /* H <- V' * V */
998: VecsMult(H, 0, n, V, 0, n, V, 0, n, T, PETSC_NULL);
1000: /* H <- chol(H) */
1001: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1002: LAPACKpbtrf_("U", &nn, &nn, H, &nn, &info);
1003: PetscFPTrapPop();
1004: if (info) SETERRQ1(((PetscObject)*V)->comm,PETSC_ERR_LIB, "Error in Lapack PBTRF %d", info);
1006: /* V <- V * inv(H) */
1007: VecGetLocalSize(V[0], &ldV);
1008: VecGetArray(V[0], &pv);
1009: ld = ldV;
1010: BLAStrsm_("R", "U", "N", "N", &ld, &nn, &one, H, &nn, pv, &ld);
1011: VecRestoreArray(V[0], &pv);
1012: for(i=1; i<n; i++) {
1013: PetscObjectStateIncrease((PetscObject)V[i]);
1014: }
1016: if (!wS0) {
1017: PetscFree(H);
1018: }
1019: if (!wS1) {
1020: PetscFree(T);
1021: }
1022:
1023: return(0);
1024: #endif
1025: }
1029: /*
1030: Sum up several arrays with only one call to MPIReduce.
1031: */
1032: PetscErrorCode SlepcAllReduceSumBegin(DvdReductionChunk *ops,
1033: PetscInt max_size_ops,
1034: PetscScalar *in, PetscScalar *out,
1035: PetscInt max_size_in, DvdReduction *r,
1036: MPI_Comm comm)
1037: {
1042: r->in = in;
1043: r->out = out;
1044: r->size_in = 0;
1045: r->max_size_in = max_size_in;
1046: r->ops = ops;
1047: r->size_ops = 0;
1048: r->max_size_ops = max_size_ops;
1049: r->comm = comm;
1051: return(0);
1052: }
1056: PetscErrorCode SlepcAllReduceSum(DvdReduction *r, PetscInt size_in,
1057: DvdReductionPostF f, void *ptr,
1058: PetscScalar **in)
1059: {
1061: *in = r->in + r->size_in;
1062: r->ops[r->size_ops].out = r->out + r->size_in;
1063: r->ops[r->size_ops].size_out = size_in;
1064: r->ops[r->size_ops].f = f;
1065: r->ops[r->size_ops].ptr = ptr;
1066: if (++(r->size_ops) > r->max_size_ops) SETERRQ(PETSC_COMM_SELF,1, "max_size_ops is not large enough");
1067: if ((r->size_in+= size_in) > r->max_size_in) SETERRQ(PETSC_COMM_SELF,1, "max_size_in is not large enough");
1068: return(0);
1069: }
1074: PetscErrorCode SlepcAllReduceSumEnd(DvdReduction *r)
1075: {
1076: PetscErrorCode ierr;
1077: PetscInt i;
1081: /* Check if quick exit */
1082: if (r->size_ops == 0)
1083: return(0);
1085: /* Call the MPIAllReduce routine */
1086: MPI_Allreduce(r->in, r->out, r->size_in, MPIU_SCALAR, MPIU_SUM,
1087: r->comm);
1089: /* Call the postponed routines */
1090: for(i=0; i<r->size_ops; i++) {
1091: r->ops[i].f(r->ops[i].out, r->ops[i].size_out, r->ops[i].ptr);
1092:
1093: }
1095: /* Tag the operation as done */
1096: r->size_ops = 0;
1098: return(0);
1099: }
1104: /* auxS: size_cX+V_new_e */
1105: PetscErrorCode dvd_orthV(IP ip, Vec *defl, PetscInt size_DS, Vec *cX,
1106: PetscInt size_cX, Vec *V, PetscInt V_new_s,
1107: PetscInt V_new_e, PetscScalar *auxS,
1108: PetscRandom rand)
1109: {
1110: PetscErrorCode ierr;
1111: PetscInt i, j;
1112: PetscBool lindep;
1113: PetscReal norm;
1114: PetscScalar *auxS0 = auxS;
1115:
1117:
1118: /* Orthonormalize V with IP */
1119: for (i=V_new_s; i<V_new_e; i++) {
1120: for(j=0; j<3; j++) {
1121: if (j>0) { SlepcVecSetRandom(V[i], rand); }
1122: if (cX + size_cX == V) {
1123: /* If cX and V are contiguous, orthogonalize in one step */
1124: IPOrthogonalize(ip, size_DS, defl, size_cX+i, PETSC_NULL, cX,
1125: V[i], auxS0, &norm, &lindep);
1126: } else if (defl) {
1127: /* Else orthogonalize first against defl, and then against cX and V */
1128: IPOrthogonalize(ip, size_DS, defl, size_cX, PETSC_NULL, cX,
1129: V[i], auxS0, PETSC_NULL, &lindep);
1130: if(!lindep) {
1131: IPOrthogonalize(ip, 0, PETSC_NULL, i, PETSC_NULL, V,
1132: V[i], auxS0, &norm, &lindep);
1133: }
1134: } else {
1135: /* Else orthogonalize first against cX and then against V */
1136: IPOrthogonalize(ip, size_cX, cX, i, PETSC_NULL, V,
1137: V[i], auxS0, &norm, &lindep);
1138: }
1139: if(!lindep && (norm > PETSC_SQRT_MACHINE_EPSILON)) break;
1140: PetscInfo1(ip,"Orthonormalization problems adding the vector %D to the searching subspace\n",i);
1141: }
1142: if(lindep || (norm < PETSC_SQRT_MACHINE_EPSILON)) SETERRQ(((PetscObject)ip)->comm,1, "Error during orthonormalization of eigenvectors");
1143: VecScale(V[i], 1.0/norm);
1144: }
1145:
1146: return(0);
1147: }
1152: /* auxS: size_cX+V_new_e+1 */
1153: PetscErrorCode dvd_BorthV_faster(IP ip, Vec *defl, Vec *BDS,PetscReal *BDSn, PetscInt size_DS, Vec *cX,
1154: Vec *BcX, PetscReal *BcXn, PetscInt size_cX, Vec *V, Vec *BV,PetscReal *BVn,
1155: PetscInt V_new_s, PetscInt V_new_e,
1156: PetscScalar *auxS, PetscRandom rand)
1157: {
1158: PetscErrorCode ierr;
1159: PetscInt i, j;
1160: PetscBool lindep;
1161: PetscReal norm;
1162: PetscScalar *auxS0 = auxS;
1163:
1165:
1166: /* Orthonormalize V with IP */
1167: for (i=V_new_s; i<V_new_e; i++) {
1168: for(j=0; j<3; j++) {
1169: if (j>0) { SlepcVecSetRandom(V[i], rand); }
1170: if (cX + size_cX == V && BcX + size_cX == BV) {
1171: /* If cX and V are contiguous, orthogonalize in one step */
1172: IPBOrthogonalize(ip, size_DS, defl, BDS, BDSn, size_cX+i, PETSC_NULL, cX, BcX, BcXn,
1173: V[i], BV[i], auxS0, &norm, &lindep);
1174: } else if (defl) {
1175: /* Else orthogonalize first against defl, and then against cX and V */
1176: IPBOrthogonalize(ip, size_DS, defl, BDS, BDSn, size_cX, PETSC_NULL, cX, BcX, BcXn,
1177: V[i], BV[i], auxS0, PETSC_NULL, &lindep);
1178: if(!lindep) {
1179: IPBOrthogonalize(ip, 0, PETSC_NULL, PETSC_NULL, PETSC_NULL, i, PETSC_NULL, V, BV, BVn,
1180: V[i], BV[i], auxS0, &norm, &lindep);
1181: }
1182: } else {
1183: /* Else orthogonalize first against cX and then against V */
1184: IPBOrthogonalize(ip, size_cX, cX, BcX, BcXn, i, PETSC_NULL, V, BV, BVn,
1185: V[i], BV[i], auxS0, &norm, &lindep);
1186: }
1187: if(!lindep && (PetscAbs(norm) > PETSC_SQRT_MACHINE_EPSILON)) break;
1188: PetscInfo1(ip, "Orthonormalization problems adding the vector %d to the searching subspace\n", i);
1189:
1190: }
1191: if(lindep || (PetscAbs(norm) < PETSC_SQRT_MACHINE_EPSILON)) {
1192: SETERRQ(((PetscObject)ip)->comm,1, "Error during the orthonormalization of the eigenvectors");
1193: }
1194: if (BVn) BVn[i] = norm > 0.0 ? 1.0 : -1.0;
1195: norm = PetscAbs(norm);
1196: VecScale(V[i], 1.0/norm);
1197: VecScale(BV[i], 1.0/norm);
1198: }
1199:
1200: return(0);
1201: }
1205: /* auxS: size_cX+V_new_e+1 */
1206: PetscErrorCode dvd_BorthV_stable(IP ip,Vec *defl,PetscReal *BDSn,PetscInt size_DS,Vec *cX,PetscReal *BcXn,PetscInt size_cX,Vec *V,PetscReal *BVn,PetscInt V_new_s,PetscInt V_new_e,PetscScalar *auxS,PetscRandom rand)
1207: {
1208: PetscErrorCode ierr;
1209: PetscInt i, j;
1210: PetscBool lindep;
1211: PetscReal norm;
1212: PetscScalar *auxS0 = auxS;
1213:
1215:
1216: /* Orthonormalize V with IP */
1217: for (i=V_new_s; i<V_new_e; i++) {
1218: for(j=0; j<3; j++) {
1219: if (j>0) {
1220: SlepcVecSetRandom(V[i],rand);
1221: PetscInfo1(ip,"Orthonormalization problems adding the vector %d to the searching subspace\n",i);
1222: }
1223: /* Orthogonalize against the deflation, if needed */
1224: if (defl) {
1225: IPPseudoOrthogonalize(ip,size_DS,defl,BDSn,V[i],auxS0,PETSC_NULL,&lindep);
1226: if (lindep) continue;
1227: }
1228: /* If cX and V are contiguous, orthogonalize in one step */
1229: if (cX + size_cX == V) {
1230: IPPseudoOrthogonalize(ip,size_cX+i,cX,BcXn,V[i],auxS0,&norm,&lindep);
1231: /* Else orthogonalize first against cX and then against V */
1232: } else {
1233: IPPseudoOrthogonalize(ip,size_cX,cX,BcXn,V[i],auxS0,PETSC_NULL,&lindep);
1234: if (lindep) continue;
1235: IPPseudoOrthogonalize(ip,i,V,BVn,V[i],auxS0,&norm,&lindep);
1236: }
1237: if(!lindep && (PetscAbs(norm) > PETSC_MACHINE_EPSILON)) break;
1238: }
1239: if(lindep || (PetscAbs(norm) < PETSC_MACHINE_EPSILON)) {
1240: SETERRQ(((PetscObject)ip)->comm,1, "Error during the orthonormalization of the eigenvectors");
1241: }
1242: if (BVn) BVn[i] = norm > 0.0 ? 1.0 : -1.0;
1243: norm = PetscAbs(norm);
1244: VecScale(V[i], 1.0/norm);
1245: }
1246:
1247: return(0);
1248: }