GCC Code Coverage Report


Directory: ./
File: src/sys/classes/bv/impls/svec/sveccuda/sveccuda.cu
Date: 2026-01-12 03:57:26
Exec Total Coverage
Lines: 322 334 96.4%
Functions: 21 21 100.0%
Branches: 209 414 50.5%

Line Branch Exec Source
1 3573 /*
2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 SLEPc - Scalable Library for Eigenvalue Problem Computations
4 Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5
6 This file is part of SLEPc.
7 SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 */
10 /*
11 BV implemented as a single Vec (CUDA version)
12 */
13
14 #include <slepc/private/bvimpl.h>
15 #include <slepccupmblas.h>
16 #include "../src/sys/classes/bv/impls/svec/svec.h"
17
18 162 PetscErrorCode BVMult_Svec_CUDA(BV Y,PetscScalar alpha,PetscScalar beta,BV X,Mat Q)
19 {
20 162 BV_SVEC *y = (BV_SVEC*)Y->data,*x = (BV_SVEC*)X->data;
21 162 const PetscScalar *d_px,*d_A,*d_B,*d_q;
22 162 PetscScalar *d_py,*d_C;
23 162 PetscInt ldq;
24
25 162 PetscFunctionBegin;
26
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
162 if (!Y->n) PetscFunctionReturn(PETSC_SUCCESS);
27
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
162 PetscCall(VecCUDAGetArrayRead(x->v,&d_px));
28
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
162 if (beta==(PetscScalar)0.0) PetscCall(VecCUDAGetArrayWrite(y->v,&d_py));
29
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
162 else PetscCall(VecCUDAGetArray(y->v,&d_py));
30 162 d_A = d_px+(X->nc+X->l)*X->ld;
31 162 d_C = d_py+(Y->nc+Y->l)*Y->ld;
32
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
162 if (Q) {
33
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
130 PetscCall(MatDenseGetLDA(Q,&ldq));
34
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
130 PetscCall(BV_MatDenseCUDAGetArrayRead(Y,Q,&d_q));
35 130 d_B = d_q+Y->l*ldq+X->l;
36
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
130 PetscCall(BVMult_BLAS_CUDA(Y,Y->n,Y->k-Y->l,X->k-X->l,alpha,d_A,X->ld,d_B,ldq,beta,d_C,Y->ld));
37
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
130 PetscCall(BV_MatDenseCUDARestoreArrayRead(Y,Q,&d_q));
38
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 } else PetscCall(BVAXPY_BLAS_CUDA(Y,Y->n,Y->k-Y->l,alpha,d_A,X->ld,beta,d_C,Y->ld));
39
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
162 PetscCall(VecCUDARestoreArrayRead(x->v,&d_px));
40
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
162 if (beta==(PetscScalar)0.0) PetscCall(VecCUDARestoreArrayWrite(y->v,&d_py));
41
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
162 else PetscCall(VecCUDARestoreArray(y->v,&d_py));
42 PetscFunctionReturn(PETSC_SUCCESS);
43 }
44
45 466 PetscErrorCode BVMultVec_Svec_CUDA(BV X,PetscScalar alpha,PetscScalar beta,Vec y,PetscScalar *q)
46 {
47 466 BV_SVEC *x = (BV_SVEC*)X->data;
48 466 PetscScalar *d_py,*d_q;
49 466 const PetscScalar *d_px;
50
51 466 PetscFunctionBegin;
52
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
466 PetscCall(VecCUDAGetArrayRead(x->v,&d_px));
53
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
466 if (beta==(PetscScalar)0.0) PetscCall(VecCUDAGetArrayWrite(y,&d_py));
54
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
466 else PetscCall(VecCUDAGetArray(y,&d_py));
55
3/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
466 if (!q) PetscCall(VecCUDAGetArray(X->buffer,&d_q));
56 else {
57 72 PetscInt k=X->k-X->l;
58
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
72 PetscCallCUDA(cudaMalloc((void**)&d_q,k*sizeof(PetscScalar)));
59
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
72 PetscCallCUDA(cudaMemcpy(d_q,q,k*sizeof(PetscScalar),cudaMemcpyHostToDevice));
60 72 PetscCall(PetscLogCpuToGpu(k*sizeof(PetscScalar)));
61 }
62
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
466 PetscCall(BVMultVec_BLAS_CUDA(X,X->n,X->k-X->l,alpha,d_px+(X->nc+X->l)*X->ld,X->ld,d_q,beta,d_py));
63
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
466 PetscCall(VecCUDARestoreArrayRead(x->v,&d_px));
64
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
466 if (beta==(PetscScalar)0.0) PetscCall(VecCUDARestoreArrayWrite(y,&d_py));
65
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
466 else PetscCall(VecCUDARestoreArray(y,&d_py));
66
3/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
466 if (!q) PetscCall(VecCUDARestoreArray(X->buffer,&d_q));
67
1/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
72 else PetscCallCUDA(cudaFree(d_q));
68 PetscFunctionReturn(PETSC_SUCCESS);
69 }
70
71 94 PetscErrorCode BVMultInPlace_Svec_CUDA(BV V,Mat Q,PetscInt s,PetscInt e)
72 {
73 94 BV_SVEC *ctx = (BV_SVEC*)V->data;
74 94 PetscScalar *d_pv;
75 94 const PetscScalar *d_q;
76 94 PetscInt ldq;
77
78 94 PetscFunctionBegin;
79
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
94 if (s>=e || !V->n) PetscFunctionReturn(PETSC_SUCCESS);
80
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
94 PetscCall(MatDenseGetLDA(Q,&ldq));
81
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
94 PetscCall(VecCUDAGetArray(ctx->v,&d_pv));
82
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
94 PetscCall(BV_MatDenseCUDAGetArrayRead(V,Q,&d_q));
83
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
94 PetscCall(BVMultInPlace_BLAS_CUDA(V,V->n,V->k-V->l,s-V->l,e-V->l,d_pv+(V->nc+V->l)*V->ld,V->ld,d_q+V->l*ldq+V->l,ldq,PETSC_FALSE));
84
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
94 PetscCall(BV_MatDenseCUDARestoreArrayRead(V,Q,&d_q));
85
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
94 PetscCall(VecCUDARestoreArray(ctx->v,&d_pv));
86 PetscFunctionReturn(PETSC_SUCCESS);
87 }
88
89 2 PetscErrorCode BVMultInPlaceHermitianTranspose_Svec_CUDA(BV V,Mat Q,PetscInt s,PetscInt e)
90 {
91 2 BV_SVEC *ctx = (BV_SVEC*)V->data;
92 2 PetscScalar *d_pv;
93 2 const PetscScalar *d_q;
94 2 PetscInt ldq;
95
96 2 PetscFunctionBegin;
97
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
2 if (s>=e || !V->n) PetscFunctionReturn(PETSC_SUCCESS);
98
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 PetscCall(MatDenseGetLDA(Q,&ldq));
99
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 PetscCall(VecCUDAGetArray(ctx->v,&d_pv));
100
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 PetscCall(BV_MatDenseCUDAGetArrayRead(V,Q,&d_q));
101
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 PetscCall(BVMultInPlace_BLAS_CUDA(V,V->n,V->k-V->l,s-V->l,e-V->l,d_pv+(V->nc+V->l)*V->ld,V->ld,d_q+V->l*ldq+V->l,ldq,PETSC_TRUE));
102
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 PetscCall(BV_MatDenseCUDARestoreArrayRead(V,Q,&d_q));
103
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 PetscCall(VecCUDARestoreArray(ctx->v,&d_pv));
104 PetscFunctionReturn(PETSC_SUCCESS);
105 }
106
107 350 PetscErrorCode BVDot_Svec_CUDA(BV X,BV Y,Mat M)
108 {
109 350 BV_SVEC *x = (BV_SVEC*)X->data,*y = (BV_SVEC*)Y->data;
110 350 const PetscScalar *d_px,*d_py;
111 350 PetscScalar *pm;
112 350 PetscInt ldm;
113
114 350 PetscFunctionBegin;
115
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
350 PetscCall(MatDenseGetLDA(M,&ldm));
116
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
350 PetscCall(VecCUDAGetArrayRead(x->v,&d_px));
117
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
350 PetscCall(VecCUDAGetArrayRead(y->v,&d_py));
118
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
350 PetscCall(MatDenseGetArrayWrite(M,&pm));
119
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
350 PetscCall(BVDot_BLAS_CUDA(X,Y->k-Y->l,X->k-X->l,X->n,d_py+(Y->nc+Y->l)*Y->ld,Y->ld,d_px+(X->nc+X->l)*X->ld,X->ld,pm+X->l*ldm+Y->l,ldm,x->mpi));
120
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
350 PetscCall(MatDenseRestoreArrayWrite(M,&pm));
121
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
350 PetscCall(VecCUDARestoreArrayRead(x->v,&d_px));
122
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
350 PetscCall(VecCUDARestoreArrayRead(y->v,&d_py));
123 PetscFunctionReturn(PETSC_SUCCESS);
124 }
125
126 476 PetscErrorCode BVDotVec_Svec_CUDA(BV X,Vec y,PetscScalar *q)
127 {
128 476 BV_SVEC *x = (BV_SVEC*)X->data;
129 476 const PetscScalar *d_px,*d_py;
130 476 Vec z = y;
131
132 476 PetscFunctionBegin;
133
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
476 if (PetscUnlikely(X->matrix)) {
134
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
94 PetscCall(BV_IPMatMult(X,y));
135 94 z = X->Bx;
136 }
137
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
476 PetscCall(VecCUDAGetArrayRead(x->v,&d_px));
138
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
476 PetscCall(VecCUDAGetArrayRead(z,&d_py));
139
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
476 PetscCall(BVDotVec_BLAS_CUDA(X,X->n,X->k-X->l,d_px+(X->nc+X->l)*X->ld,X->ld,d_py,q,x->mpi));
140
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
476 PetscCall(VecCUDARestoreArrayRead(z,&d_py));
141
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
476 PetscCall(VecCUDARestoreArrayRead(x->v,&d_px));
142 PetscFunctionReturn(PETSC_SUCCESS);
143 }
144
145 8 PetscErrorCode BVDotVec_Local_Svec_CUDA(BV X,Vec y,PetscScalar *m)
146 {
147 8 BV_SVEC *x = (BV_SVEC*)X->data;
148 8 const PetscScalar *d_px,*d_py;
149 8 Vec z = y;
150
151 8 PetscFunctionBegin;
152
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
8 if (PetscUnlikely(X->matrix)) {
153 PetscCall(BV_IPMatMult(X,y));
154 z = X->Bx;
155 }
156
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
8 PetscCall(VecCUDAGetArrayRead(x->v,&d_px));
157
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
8 PetscCall(VecCUDAGetArrayRead(z,&d_py));
158
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
8 PetscCall(BVDotVec_BLAS_CUDA(X,X->n,X->k-X->l,d_px+(X->nc+X->l)*X->ld,X->ld,d_py,m,PETSC_FALSE));
159
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
8 PetscCall(VecCUDARestoreArrayRead(z,&d_py));
160
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
8 PetscCall(VecCUDARestoreArrayRead(x->v,&d_px));
161 PetscFunctionReturn(PETSC_SUCCESS);
162 }
163
164 440 PetscErrorCode BVScale_Svec_CUDA(BV bv,PetscInt j,PetscScalar alpha)
165 {
166 440 BV_SVEC *ctx = (BV_SVEC*)bv->data;
167 440 PetscScalar *d_array,*d_A;
168 440 PetscInt n=0;
169
170 440 PetscFunctionBegin;
171
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
440 if (!bv->n) PetscFunctionReturn(PETSC_SUCCESS);
172
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
440 PetscCall(VecCUDAGetArray(ctx->v,&d_array));
173
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
440 if (PetscUnlikely(j<0)) {
174 12 d_A = d_array+(bv->nc+bv->l)*bv->ld;
175 12 n = (bv->k-bv->l)*bv->ld;
176 } else {
177 428 d_A = d_array+(bv->nc+j)*bv->ld;
178 428 n = bv->n;
179 }
180
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
440 PetscCall(BVScale_BLAS_CUDA(bv,n,d_A,alpha));
181
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
440 PetscCall(VecCUDARestoreArray(ctx->v,&d_array));
182 PetscFunctionReturn(PETSC_SUCCESS);
183 }
184
185 187 PetscErrorCode BVNorm_Svec_CUDA(BV bv,PetscInt j,NormType type,PetscReal *val)
186 {
187 187 BV_SVEC *ctx = (BV_SVEC*)bv->data;
188 187 const PetscScalar *array,*d_array,*d_A;
189 187 PetscInt n=0;
190
191 187 PetscFunctionBegin;
192
6/8
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
187 if (!ctx->mpi && ((j<0 && type==NORM_FROBENIUS && bv->ld==bv->n) || (j>=0 && type==NORM_2))) {
193 /* compute on GPU with cuBLAS - TODO: include the MPI case here */
194 55 *val = 0.0;
195
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
55 if (!bv->n) PetscFunctionReturn(PETSC_SUCCESS);
196
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
55 PetscCall(VecCUDAGetArrayRead(ctx->v,&d_array));
197
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
55 if (PetscUnlikely(j<0)) {
198 24 d_A = d_array+(bv->nc+bv->l)*bv->ld;
199 24 n = (bv->k-bv->l)*bv->ld;
200 } else {
201 31 d_A = d_array+(bv->nc+j)*bv->ld;
202 31 n = bv->n;
203 }
204
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
55 PetscCall(BVNorm_BLAS_CUDA(bv,n,d_A,val));
205
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
55 PetscCall(VecCUDARestoreArrayRead(ctx->v,&d_array));
206 } else {
207 /* compute on CPU */
208
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
132 PetscCall(VecGetArrayRead(ctx->v,&array));
209
3/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
132 if (PetscUnlikely(j<0)) PetscCall(BVNorm_LAPACK_Private(bv,bv->n,bv->k-bv->l,array+(bv->nc+bv->l)*bv->ld,bv->ld,type,val,ctx->mpi));
210
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
40 else PetscCall(BVNorm_LAPACK_Private(bv,bv->n,1,array+(bv->nc+j)*bv->ld,bv->ld,type,val,ctx->mpi));
211
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
132 PetscCall(VecRestoreArrayRead(ctx->v,&array));
212 }
213 PetscFunctionReturn(PETSC_SUCCESS);
214 }
215
216 4 PetscErrorCode BVNorm_Local_Svec_CUDA(BV bv,PetscInt j,NormType type,PetscReal *val)
217 {
218 4 BV_SVEC *ctx = (BV_SVEC*)bv->data;
219 4 const PetscScalar *array,*d_array,*d_A;
220 4 PetscInt n=0;
221
222 4 PetscFunctionBegin;
223
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
4 if ((j<0 && type==NORM_FROBENIUS && bv->ld==bv->n) || (j>=0 && type==NORM_2)) {
224 /* compute on GPU with cuBLAS */
225 4 *val = 0.0;
226
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4 if (!bv->n) PetscFunctionReturn(PETSC_SUCCESS);
227
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4 PetscCall(VecCUDAGetArrayRead(ctx->v,&d_array));
228
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4 if (PetscUnlikely(j<0)) {
229 d_A = d_array+(bv->nc+bv->l)*bv->ld;
230 n = (bv->k-bv->l)*bv->ld;
231 } else {
232 4 d_A = d_array+(bv->nc+j)*bv->ld;
233 4 n = bv->n;
234 }
235
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4 PetscCall(BVNorm_BLAS_CUDA(bv,n,d_A,val));
236
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
4 PetscCall(VecCUDARestoreArrayRead(ctx->v,&d_array));
237 } else {
238 /* compute on CPU */
239 PetscCall(VecGetArrayRead(ctx->v,&array));
240 if (PetscUnlikely(j<0)) PetscCall(BVNorm_LAPACK_Private(bv,bv->n,bv->k-bv->l,array+(bv->nc+bv->l)*bv->ld,bv->ld,type,val,PETSC_FALSE));
241 else PetscCall(BVNorm_LAPACK_Private(bv,bv->n,1,array+(bv->nc+j)*bv->ld,bv->ld,type,val,PETSC_FALSE));
242 PetscCall(VecRestoreArrayRead(ctx->v,&array));
243 }
244 PetscFunctionReturn(PETSC_SUCCESS);
245 }
246
247 9 PetscErrorCode BVNormalize_Svec_CUDA(BV bv,PetscScalar *eigi)
248 {
249 9 BV_SVEC *ctx = (BV_SVEC*)bv->data;
250 9 PetscScalar *array,*d_array,*wi=NULL;
251
252 9 PetscFunctionBegin;
253
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
9 if (eigi) wi = eigi+bv->l;
254
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
9 if (!ctx->mpi) {
255 /* compute on GPU with cuBLAS - TODO: include the MPI case here */
256
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
3 if (!bv->n) PetscFunctionReturn(PETSC_SUCCESS);
257
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
3 PetscCall(VecCUDAGetArray(ctx->v,&d_array));
258
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
3 PetscCall(BVNormalize_BLAS_CUDA(bv,bv->n,bv->k-bv->l,d_array+(bv->nc+bv->l)*bv->ld,bv->ld,wi));
259
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
3 PetscCall(VecCUDARestoreArray(ctx->v,&d_array));
260 } else {
261 /* compute on CPU */
262
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
6 PetscCall(VecGetArray(ctx->v,&array));
263
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
6 PetscCall(BVNormalize_LAPACK_Private(bv,bv->n,bv->k-bv->l,array+(bv->nc+bv->l)*bv->ld,bv->ld,wi,ctx->mpi));
264
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
6 PetscCall(VecRestoreArray(ctx->v,&array));
265 }
266 PetscFunctionReturn(PETSC_SUCCESS);
267 }
268
269 168 PetscErrorCode BVMatMult_Svec_CUDA(BV V,Mat A,BV W)
270 {
271 168 BV_SVEC *v = (BV_SVEC*)V->data,*w = (BV_SVEC*)W->data;
272 168 Mat Vmat,Wmat;
273 168 const PetscScalar *d_pv;
274 168 PetscScalar *d_pw;
275 168 PetscInt j;
276
277 168 PetscFunctionBegin;
278
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
168 if (V->vmm) {
279
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
162 PetscCall(BVGetMat(V,&Vmat));
280
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
162 PetscCall(BVGetMat(W,&Wmat));
281
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
162 PetscCall(MatProductCreateWithMat(A,Vmat,NULL,Wmat));
282
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
162 PetscCall(MatProductSetType(Wmat,MATPRODUCT_AB));
283
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
162 PetscCall(MatProductSetFromOptions(Wmat));
284
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
162 PetscCall(MatProductSymbolic(Wmat));
285
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
162 PetscCall(MatProductNumeric(Wmat));
286
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
162 PetscCall(MatProductClear(Wmat));
287
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
162 PetscCall(BVRestoreMat(V,&Vmat));
288
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
162 PetscCall(BVRestoreMat(W,&Wmat));
289 } else {
290
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
6 PetscCall(VecCUDAGetArrayRead(v->v,&d_pv));
291
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
6 PetscCall(VecCUDAGetArrayWrite(w->v,&d_pw));
292
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
52 for (j=0;j<V->k-V->l;j++) {
293
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
46 PetscCall(VecCUDAPlaceArray(V->cv[1],(PetscScalar *)d_pv+(V->nc+V->l+j)*V->ld));
294
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
46 PetscCall(VecCUDAPlaceArray(W->cv[1],d_pw+(W->nc+W->l+j)*W->ld));
295
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
46 PetscCall(MatMult(A,V->cv[1],W->cv[1]));
296
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
46 PetscCall(VecCUDAResetArray(V->cv[1]));
297
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
46 PetscCall(VecCUDAResetArray(W->cv[1]));
298 }
299
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
6 PetscCall(VecCUDARestoreArrayRead(v->v,&d_pv));
300
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
6 PetscCall(VecCUDARestoreArrayWrite(w->v,&d_pw));
301 }
302 PetscFunctionReturn(PETSC_SUCCESS);
303 }
304
305 154 PetscErrorCode BVCopy_Svec_CUDA(BV V,BV W)
306 {
307 154 BV_SVEC *v = (BV_SVEC*)V->data,*w = (BV_SVEC*)W->data;
308 154 const PetscScalar *d_pv;
309 154 PetscScalar *d_pw;
310
311 154 PetscFunctionBegin;
312
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
154 PetscCall(VecCUDAGetArrayRead(v->v,&d_pv));
313
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
154 PetscCall(VecCUDAGetArray(w->v,&d_pw));
314
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
154 PetscCallCUDA(cudaMemcpy2D(d_pw+(W->nc+W->l)*W->ld,W->ld*sizeof(PetscScalar),d_pv+(V->nc+V->l)*V->ld,V->ld*sizeof(PetscScalar),V->n*sizeof(PetscScalar),V->k-V->l,cudaMemcpyDeviceToDevice));
315
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
154 PetscCall(VecCUDARestoreArrayRead(v->v,&d_pv));
316
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
154 PetscCall(VecCUDARestoreArray(w->v,&d_pw));
317 PetscFunctionReturn(PETSC_SUCCESS);
318 }
319
320 4 PetscErrorCode BVCopyColumn_Svec_CUDA(BV V,PetscInt j,PetscInt i)
321 {
322 4 BV_SVEC *v = (BV_SVEC*)V->data;
323 4 PetscScalar *d_pv;
324
325 4 PetscFunctionBegin;
326
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4 PetscCall(VecCUDAGetArray(v->v,&d_pv));
327
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
4 PetscCallCUDA(cudaMemcpy(d_pv+(V->nc+i)*V->ld,d_pv+(V->nc+j)*V->ld,V->n*sizeof(PetscScalar),cudaMemcpyDeviceToDevice));
328
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4 PetscCall(VecCUDARestoreArray(v->v,&d_pv));
329 PetscFunctionReturn(PETSC_SUCCESS);
330 }
331
332 14 PetscErrorCode BVResize_Svec_CUDA(BV bv,PetscInt m,PetscBool copy)
333 {
334 14 BV_SVEC *ctx = (BV_SVEC*)bv->data;
335 14 const PetscScalar *d_pv;
336 14 PetscScalar *d_pnew;
337 14 PetscInt bs;
338 14 Vec vnew;
339 14 char str[50];
340
341 14 PetscFunctionBegin;
342
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
14 PetscCall(PetscLayoutGetBlockSize(bv->map,&bs));
343
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
14 PetscCall(VecCreate(PetscObjectComm((PetscObject)bv),&vnew));
344
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
14 PetscCall(VecSetType(vnew,bv->vtype));
345
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
14 PetscCall(VecSetSizes(vnew,m*bv->ld,PETSC_DECIDE));
346
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
14 PetscCall(VecSetBlockSize(vnew,bs));
347
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
14 if (((PetscObject)bv)->name) {
348
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
14 PetscCall(PetscSNPrintf(str,sizeof(str),"%s_0",((PetscObject)bv)->name));
349
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
14 PetscCall(PetscObjectSetName((PetscObject)vnew,str));
350 }
351
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
14 if (copy) {
352
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4 PetscCall(VecCUDAGetArrayRead(ctx->v,&d_pv));
353
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4 PetscCall(VecCUDAGetArrayWrite(vnew,&d_pnew));
354
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
4 PetscCallCUDA(cudaMemcpy(d_pnew,d_pv,PetscMin(m,bv->m)*bv->ld*sizeof(PetscScalar),cudaMemcpyDeviceToDevice));
355
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4 PetscCall(VecCUDARestoreArrayRead(ctx->v,&d_pv));
356
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4 PetscCall(VecCUDARestoreArrayWrite(vnew,&d_pnew));
357 }
358
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
14 PetscCall(VecDestroy(&ctx->v));
359 14 ctx->v = vnew;
360 14 PetscFunctionReturn(PETSC_SUCCESS);
361 }
362
363 3467 PetscErrorCode BVGetColumn_Svec_CUDA(BV bv,PetscInt j,Vec*)
364 {
365 3467 BV_SVEC *ctx = (BV_SVEC*)bv->data;
366 3467 PetscScalar *d_pv;
367 3467 PetscInt l;
368
369 3467 PetscFunctionBegin;
370
3/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3467 l = BVAvailableVec;
371
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
3467 PetscCall(VecCUDAGetArray(ctx->v,&d_pv));
372
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
3467 PetscCall(VecCUDAPlaceArray(bv->cv[l],d_pv+(bv->nc+j)*bv->ld));
373 PetscFunctionReturn(PETSC_SUCCESS);
374 }
375
376 3467 PetscErrorCode BVRestoreColumn_Svec_CUDA(BV bv,PetscInt j,Vec*)
377 {
378 3467 BV_SVEC *ctx = (BV_SVEC*)bv->data;
379 3467 PetscInt l;
380
381 3467 PetscFunctionBegin;
382 3467 l = (j==bv->ci[0])? 0: 1;
383
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
3467 PetscCall(VecCUDAResetArray(bv->cv[l]));
384
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
3467 PetscCall(VecCUDARestoreArray(ctx->v,NULL));
385 PetscFunctionReturn(PETSC_SUCCESS);
386 }
387
388 72 PetscErrorCode BVRestoreSplit_Svec_CUDA(BV bv,BV *L,BV *R)
389 {
390 72 Vec v;
391 72 const PetscScalar *d_pv;
392 72 PetscObjectState lstate,rstate;
393 72 PetscBool change=PETSC_FALSE;
394
395 72 PetscFunctionBegin;
396 /* force sync flag to PETSC_CUDA_BOTH */
397
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
72 if (L) {
398
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
64 PetscCall(PetscObjectStateGet((PetscObject)*L,&lstate));
399
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
64 if (lstate != bv->lstate) {
400 8 v = ((BV_SVEC*)bv->L->data)->v;
401
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
8 PetscCall(VecCUDAGetArrayRead(v,&d_pv));
402
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
8 PetscCall(VecCUDARestoreArrayRead(v,&d_pv));
403 change = PETSC_TRUE;
404 }
405 }
406
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
72 if (R) {
407
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscObjectStateGet((PetscObject)*R,&rstate));
408
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
16 if (rstate != bv->rstate) {
409 8 v = ((BV_SVEC*)bv->R->data)->v;
410
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
8 PetscCall(VecCUDAGetArrayRead(v,&d_pv));
411
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
8 PetscCall(VecCUDARestoreArrayRead(v,&d_pv));
412 change = PETSC_TRUE;
413 }
414 }
415
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
64 if (change) {
416 16 v = ((BV_SVEC*)bv->data)->v;
417
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(VecCUDAGetArray(v,(PetscScalar **)&d_pv));
418
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
16 PetscCall(VecCUDARestoreArray(v,(PetscScalar **)&d_pv));
419 }
420 PetscFunctionReturn(PETSC_SUCCESS);
421 }
422
423 12 PetscErrorCode BVRestoreSplitRows_Svec_CUDA(BV bv,IS,IS,BV *U,BV *L)
424 {
425 12 Vec v;
426 12 const PetscScalar *d_pv;
427 12 PetscObjectState lstate,rstate;
428 12 PetscBool change=PETSC_FALSE;
429
430 12 PetscFunctionBegin;
431 /* force sync flag to PETSC_CUDA_BOTH */
432
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
12 if (U) {
433
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
12 PetscCall(PetscObjectStateGet((PetscObject)*U,&rstate));
434
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
12 if (rstate != bv->rstate) {
435 v = ((BV_SVEC*)bv->R->data)->v;
436 PetscCall(VecCUDAGetArrayRead(v,&d_pv));
437 PetscCall(VecCUDARestoreArrayRead(v,&d_pv));
438 change = PETSC_TRUE;
439 }
440 }
441
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
12 if (L) {
442
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
12 PetscCall(PetscObjectStateGet((PetscObject)*L,&lstate));
443
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
12 if (lstate != bv->lstate) {
444 12 v = ((BV_SVEC*)bv->L->data)->v;
445
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
12 PetscCall(VecCUDAGetArrayRead(v,&d_pv));
446
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
12 PetscCall(VecCUDARestoreArrayRead(v,&d_pv));
447 change = PETSC_TRUE;
448 }
449 }
450 if (change) {
451 12 v = ((BV_SVEC*)bv->data)->v;
452
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
12 PetscCall(VecCUDAGetArray(v,(PetscScalar **)&d_pv));
453
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
12 PetscCall(VecCUDARestoreArray(v,(PetscScalar **)&d_pv));
454 }
455 PetscFunctionReturn(PETSC_SUCCESS);
456 }
457
458 330 PetscErrorCode BVGetMat_Svec_CUDA(BV bv,Mat *A)
459 {
460 330 BV_SVEC *ctx = (BV_SVEC*)bv->data;
461 330 PetscScalar *vv,*aa;
462 330 PetscBool create=PETSC_FALSE;
463 330 PetscInt m,cols;
464
465 330 PetscFunctionBegin;
466 330 m = bv->k-bv->l;
467
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
330 if (!bv->Aget) create=PETSC_TRUE;
468 else {
469
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
202 PetscCall(MatDenseCUDAGetArray(bv->Aget,&aa));
470
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
202 PetscCheck(!aa,PetscObjectComm((PetscObject)bv),PETSC_ERR_ARG_WRONGSTATE,"BVGetMat already called on this BV");
471
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
202 PetscCall(MatGetSize(bv->Aget,NULL,&cols));
472
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
202 if (cols!=m) {
473
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
98 PetscCall(MatDestroy(&bv->Aget));
474 create=PETSC_TRUE;
475 }
476 }
477
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
330 PetscCall(VecCUDAGetArray(ctx->v,&vv));
478
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
330 if (create) {
479
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
226 PetscCall(MatCreateDenseFromVecType(PetscObjectComm((PetscObject)bv),bv->vtype,bv->n,PETSC_DECIDE,bv->N,m,bv->ld,vv,&bv->Aget)); /* pass a pointer to avoid allocation of storage */
480
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
226 PetscCall(MatDenseCUDAReplaceArray(bv->Aget,NULL)); /* replace with a null pointer, the value after BVRestoreMat */
481 }
482
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
330 PetscCall(MatDenseCUDAPlaceArray(bv->Aget,vv+(bv->nc+bv->l)*bv->ld)); /* set the actual pointer */
483 330 *A = bv->Aget;
484 330 PetscFunctionReturn(PETSC_SUCCESS);
485 }
486
487 330 PetscErrorCode BVRestoreMat_Svec_CUDA(BV bv,Mat *A)
488 {
489 330 BV_SVEC *ctx = (BV_SVEC*)bv->data;
490 330 PetscScalar *vv,*aa;
491
492 330 PetscFunctionBegin;
493
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
330 PetscCall(MatDenseCUDAGetArray(bv->Aget,&aa));
494 330 vv = aa-(bv->nc+bv->l)*bv->ld;
495
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
330 PetscCall(MatDenseCUDAResetArray(bv->Aget));
496
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
330 PetscCall(VecCUDARestoreArray(ctx->v,&vv));
497 330 *A = NULL;
498 330 PetscFunctionReturn(PETSC_SUCCESS);
499 }
500