Actual source code: test11.c
slepc-3.22.1 2024-10-28
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: static char help[] = "Test BV block orthogonalization.\n\n";
13: #include <slepcbv.h>
15: /*
16: Compute the Frobenius norm ||A(l:k,l:k)-diag||_F
17: */
18: PetscErrorCode MyMatNorm(Mat A,PetscInt lda,PetscInt l,PetscInt k,PetscScalar diag,PetscReal *norm)
19: {
20: PetscInt i,j;
21: const PetscScalar *pA;
22: PetscReal s,val;
24: PetscFunctionBeginUser;
25: PetscCall(MatDenseGetArrayRead(A,&pA));
26: s = 0.0;
27: for (i=l;i<k;i++) {
28: for (j=l;j<k;j++) {
29: val = (i==j)? PetscAbsScalar(pA[i+j*lda]-diag): PetscAbsScalar(pA[i+j*lda]);
30: s += val*val;
31: }
32: }
33: *norm = PetscSqrtReal(s);
34: PetscCall(MatDenseRestoreArrayRead(A,&pA));
35: PetscFunctionReturn(PETSC_SUCCESS);
36: }
38: int main(int argc,char **argv)
39: {
40: BV X,Y,Z,cached;
41: Mat B=NULL,M,R=NULL;
42: Vec v,t;
43: PetscInt i,j,n=20,l=2,k=8,Istart,Iend;
44: PetscViewer view;
45: PetscBool withb,resid,rand,verbose;
46: PetscReal norm;
47: PetscScalar alpha;
49: PetscFunctionBeginUser;
50: PetscCall(SlepcInitialize(&argc,&argv,NULL,help));
51: PetscCall(PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL));
52: PetscCall(PetscOptionsGetInt(NULL,NULL,"-l",&l,NULL));
53: PetscCall(PetscOptionsGetInt(NULL,NULL,"-k",&k,NULL));
54: PetscCall(PetscOptionsHasName(NULL,NULL,"-withb",&withb));
55: PetscCall(PetscOptionsHasName(NULL,NULL,"-resid",&resid));
56: PetscCall(PetscOptionsHasName(NULL,NULL,"-rand",&rand));
57: PetscCall(PetscOptionsHasName(NULL,NULL,"-verbose",&verbose));
58: PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Test BV block orthogonalization (length %" PetscInt_FMT ", l=%" PetscInt_FMT ", k=%" PetscInt_FMT ")%s.\n",n,l,k,withb?" with non-standard inner product":""));
60: /* Create template vector */
61: PetscCall(VecCreate(PETSC_COMM_WORLD,&t));
62: PetscCall(VecSetSizes(t,PETSC_DECIDE,n));
63: PetscCall(VecSetFromOptions(t));
65: /* Create BV object X */
66: PetscCall(BVCreate(PETSC_COMM_WORLD,&X));
67: PetscCall(PetscObjectSetName((PetscObject)X,"X"));
68: PetscCall(BVSetSizesFromVec(X,t,k));
69: PetscCall(BVSetFromOptions(X));
71: /* Set up viewer */
72: PetscCall(PetscViewerASCIIGetStdout(PETSC_COMM_WORLD,&view));
73: if (verbose) PetscCall(PetscViewerPushFormat(view,PETSC_VIEWER_ASCII_MATLAB));
75: /* Fill X entries */
76: if (rand) PetscCall(BVSetRandom(X));
77: else {
78: for (j=0;j<k;j++) {
79: PetscCall(BVGetColumn(X,j,&v));
80: PetscCall(VecSet(v,0.0));
81: for (i=0;i<=n/2;i++) {
82: if (i+j<n) {
83: alpha = (3.0*i+j-2)/(2*(i+j+1));
84: PetscCall(VecSetValue(v,i+j,alpha,INSERT_VALUES));
85: }
86: }
87: PetscCall(VecAssemblyBegin(v));
88: PetscCall(VecAssemblyEnd(v));
89: PetscCall(BVRestoreColumn(X,j,&v));
90: }
91: }
92: if (verbose) PetscCall(BVView(X,view));
94: if (withb) {
95: /* Create inner product matrix */
96: PetscCall(MatCreate(PETSC_COMM_WORLD,&B));
97: PetscCall(MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,n,n));
98: PetscCall(MatSetFromOptions(B));
99: PetscCall(PetscObjectSetName((PetscObject)B,"B"));
101: PetscCall(MatGetOwnershipRange(B,&Istart,&Iend));
102: for (i=Istart;i<Iend;i++) {
103: if (i>0) PetscCall(MatSetValue(B,i,i-1,-1.0,INSERT_VALUES));
104: if (i<n-1) PetscCall(MatSetValue(B,i,i+1,-1.0,INSERT_VALUES));
105: PetscCall(MatSetValue(B,i,i,2.0,INSERT_VALUES));
106: }
107: PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
108: PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
109: if (verbose) PetscCall(MatView(B,view));
110: PetscCall(BVSetMatrix(X,B,PETSC_FALSE));
111: }
113: /* Create copy on Y */
114: PetscCall(BVDuplicate(X,&Y));
115: PetscCall(PetscObjectSetName((PetscObject)Y,"Y"));
116: PetscCall(BVCopy(X,Y));
117: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&M));
119: if (resid) {
120: /* Create matrix R to store triangular factor */
121: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&R));
122: PetscCall(PetscObjectSetName((PetscObject)R,"R"));
123: }
125: if (l>0) {
126: /* First orthogonalize leading columns */
127: PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Orthogonalizing leading columns\n"));
128: PetscCall(BVSetActiveColumns(Y,0,l));
129: PetscCall(BVSetActiveColumns(X,0,l));
130: PetscCall(BVOrthogonalize(Y,R));
131: if (verbose) {
132: PetscCall(BVView(Y,view));
133: if (resid) PetscCall(MatView(R,view));
134: }
136: if (withb) {
137: /* Extract cached BV and check it is equal to B*X */
138: PetscCall(BVGetCachedBV(Y,&cached));
139: PetscCall(BVDuplicate(X,&Z));
140: PetscCall(BVSetMatrix(Z,NULL,PETSC_FALSE));
141: PetscCall(BVSetActiveColumns(Z,0,l));
142: PetscCall(BVCopy(X,Z));
143: PetscCall(BVMatMult(X,B,Z));
144: PetscCall(BVMult(Z,-1.0,1.0,cached,NULL));
145: PetscCall(BVNorm(Z,NORM_FROBENIUS,&norm));
146: if (norm<100*PETSC_MACHINE_EPSILON) PetscCall(PetscPrintf(PETSC_COMM_WORLD," Difference ||cached-BX|| < 100*eps\n"));
147: else PetscCall(PetscPrintf(PETSC_COMM_WORLD," Difference ||cached-BX||: %g\n",(double)norm));
148: PetscCall(BVDestroy(&Z));
149: }
151: /* Check orthogonality */
152: PetscCall(BVDot(Y,Y,M));
153: PetscCall(MyMatNorm(M,k,0,l,1.0,&norm));
154: if (norm<100*PETSC_MACHINE_EPSILON) PetscCall(PetscPrintf(PETSC_COMM_WORLD," Level of orthogonality of Q1 < 100*eps\n"));
155: else PetscCall(PetscPrintf(PETSC_COMM_WORLD," Level of orthogonality of Q1: %g\n",(double)norm));
157: if (resid) {
158: /* Check residual */
159: PetscCall(BVDuplicate(X,&Z));
160: PetscCall(BVSetMatrix(Z,NULL,PETSC_FALSE));
161: PetscCall(BVSetActiveColumns(Z,0,l));
162: PetscCall(BVCopy(X,Z));
163: PetscCall(BVMult(Z,-1.0,1.0,Y,R));
164: PetscCall(BVNorm(Z,NORM_FROBENIUS,&norm));
165: if (norm<100*PETSC_MACHINE_EPSILON) PetscCall(PetscPrintf(PETSC_COMM_WORLD," Residual ||X1-Q1*R11|| < 100*eps\n"));
166: else PetscCall(PetscPrintf(PETSC_COMM_WORLD," Residual ||X1-Q1*R11||: %g\n",(double)norm));
167: PetscCall(BVDestroy(&Z));
168: }
170: }
172: /* Now orthogonalize the rest of columns */
173: PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Orthogonalizing active columns\n"));
174: PetscCall(BVSetActiveColumns(Y,l,k));
175: PetscCall(BVSetActiveColumns(X,l,k));
176: PetscCall(BVOrthogonalize(Y,R));
177: if (verbose) {
178: PetscCall(BVView(Y,view));
179: if (resid) PetscCall(MatView(R,view));
180: }
182: if (l>0) {
183: /* Check orthogonality */
184: PetscCall(BVDot(Y,Y,M));
185: PetscCall(MyMatNorm(M,k,l,k,1.0,&norm));
186: if (norm<100*PETSC_MACHINE_EPSILON) PetscCall(PetscPrintf(PETSC_COMM_WORLD," Level of orthogonality of Q2 < 100*eps\n"));
187: else PetscCall(PetscPrintf(PETSC_COMM_WORLD," Level of orthogonality of Q2: %g\n",(double)norm));
188: }
190: /* Check the complete decomposition */
191: PetscCall(PetscPrintf(PETSC_COMM_WORLD,"Overall decomposition\n"));
192: PetscCall(BVSetActiveColumns(Y,0,k));
193: PetscCall(BVSetActiveColumns(X,0,k));
195: /* Check orthogonality */
196: PetscCall(BVDot(Y,Y,M));
197: PetscCall(MyMatNorm(M,k,0,k,1.0,&norm));
198: if (norm<100*PETSC_MACHINE_EPSILON) PetscCall(PetscPrintf(PETSC_COMM_WORLD," Level of orthogonality of Q < 100*eps\n"));
199: else PetscCall(PetscPrintf(PETSC_COMM_WORLD," Level of orthogonality of Q: %g\n",(double)norm));
201: if (resid) {
202: /* Check residual */
203: PetscCall(BVMult(X,-1.0,1.0,Y,R));
204: PetscCall(BVSetMatrix(X,NULL,PETSC_FALSE));
205: PetscCall(BVNorm(X,NORM_FROBENIUS,&norm));
206: if (norm<100*PETSC_MACHINE_EPSILON) PetscCall(PetscPrintf(PETSC_COMM_WORLD," Residual ||X-Q*R|| < 100*eps\n"));
207: else PetscCall(PetscPrintf(PETSC_COMM_WORLD," Residual ||X-Q*R||: %g\n",(double)norm));
208: PetscCall(MatDestroy(&R));
209: }
211: if (B) PetscCall(MatDestroy(&B));
212: PetscCall(MatDestroy(&M));
213: PetscCall(BVDestroy(&X));
214: PetscCall(BVDestroy(&Y));
215: PetscCall(VecDestroy(&t));
216: PetscCall(SlepcFinalize());
217: return 0;
218: }
220: /*TEST
222: testset:
223: args: -bv_orthog_block {{gs chol tsqr tsqrchol svqb}}
224: nsize: 2
225: output_file: output/test11_1.out
226: test:
227: suffix: 1
228: args: -bv_type {{vecs contiguous svec mat}shared output}
229: test:
230: suffix: 1_cuda
231: args: -bv_type {{svec mat}} -vec_type cuda
232: requires: cuda
233: test:
234: suffix: 1_hip
235: args: -bv_type {{svec mat}} -vec_type hip
236: requires: hip
238: testset:
239: args: -withb -bv_orthog_block {{gs chol svqb}}
240: nsize: 2
241: output_file: output/test11_4.out
242: test:
243: suffix: 4
244: args: -bv_type {{vecs contiguous svec mat}shared output}
245: test:
246: suffix: 4_cuda
247: args: -bv_type {{svec mat}} -vec_type cuda -mat_type aijcusparse
248: requires: cuda
249: test:
250: suffix: 4_hip
251: args: -bv_type {{svec mat}} -vec_type hip -mat_type aijhipsparse
252: requires: hip
254: testset:
255: args: -resid -bv_orthog_block {{gs chol tsqr tsqrchol svqb}}
256: nsize: 2
257: output_file: output/test11_6.out
258: test:
259: suffix: 6
260: args: -bv_type {{vecs contiguous svec mat}shared output}
261: test:
262: suffix: 6_cuda
263: args: -bv_type {{svec mat}} -vec_type cuda
264: requires: cuda
265: test:
266: suffix: 6_hip
267: args: -bv_type {{svec mat}} -vec_type hip
268: requires: hip
270: testset:
271: args: -resid -withb -bv_orthog_block {{gs chol svqb}}
272: nsize: 2
273: output_file: output/test11_9.out
274: test:
275: suffix: 9
276: args: -bv_type {{vecs contiguous svec mat}shared output}
277: test:
278: suffix: 9_cuda
279: args: -bv_type {{svec mat}} -vec_type cuda -mat_type aijcusparse
280: requires: cuda
281: test:
282: suffix: 9_hip
283: args: -bv_type {{svec mat}} -vec_type hip -mat_type aijhipsparse
284: requires: hip
286: testset:
287: args: -bv_orthog_block tsqr
288: nsize: 7
289: output_file: output/test11_1.out
290: test:
291: suffix: 11
292: args: -bv_type {{vecs contiguous svec mat}shared output}
293: requires: !defined(PETSCTEST_VALGRIND)
294: test:
295: suffix: 11_cuda
296: TODO: too many processes accessing the GPU
297: args: -bv_type {{svec mat}} -vec_type cuda
298: requires: cuda !defined(PETSCTEST_VALGRIND)
299: test:
300: suffix: 11_hip
301: TODO: too many processes accessing the GPU
302: args: -bv_type {{svec mat}} -vec_type hip
303: requires: hip !defined(PETSCTEST_VALGRIND)
305: testset:
306: args: -resid -n 180 -l 0 -k 7 -bv_orthog_block tsqr
307: nsize: 7
308: output_file: output/test11_12.out
309: test:
310: suffix: 12
311: args: -bv_type {{vecs contiguous svec mat}shared output}
312: requires: double !defined(PETSCTEST_VALGRIND)
313: test:
314: suffix: 12_cuda
315: TODO: too many processes accessing the GPU
316: args: -bv_type {{svec mat}} -vec_type cuda
317: requires: cuda !single !defined(PETSCTEST_VALGRIND)
318: test:
319: suffix: 12_hip
320: TODO: too many processes accessing the GPU
321: args: -bv_type {{svec mat}} -vec_type hip
322: requires: hip !single !defined(PETSCTEST_VALGRIND)
324: TEST*/