GCC Code Coverage Report


Directory: ./
File: src/sys/classes/bv/impls/mat/mathip/mathip.hip.cpp
Date: 2026-01-12 03:57:26
Exec Total Coverage
Lines: 113 354 31.9%
Functions: 9 20 45.0%
Branches: 136 670 20.3%

Line Branch Exec Source
1 /*
2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 SLEPc - Scalable Library for Eigenvalue Problem Computations
4 Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5
6 This file is part of SLEPc.
7 SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 */
10 /*
11 BV implemented with a dense Mat (HIP version)
12 */
13
14 #include <slepc/private/bvimpl.h>
15 #include <slepccupmblas.h>
16 #include "../src/sys/classes/bv/impls/mat/bvmat.h"
17
18 PetscErrorCode BVMult_Mat_HIP(BV Y,PetscScalar alpha,PetscScalar beta,BV X,Mat Q)
19 {
20 BV_MAT *y = (BV_MAT*)Y->data,*x = (BV_MAT*)X->data;
21 const PetscScalar *d_px,*d_A,*d_B,*d_q;
22 PetscScalar *d_py,*d_C;
23 PetscInt ldq;
24
25 PetscFunctionBegin;
26 if (!Y->n) PetscFunctionReturn(PETSC_SUCCESS);
27 PetscCall(MatDenseHIPGetArrayRead(x->A,&d_px));
28 if (beta==(PetscScalar)0.0) PetscCall(MatDenseHIPGetArrayWrite(y->A,&d_py));
29 else PetscCall(MatDenseHIPGetArray(y->A,&d_py));
30 d_A = d_px+(X->nc+X->l)*X->ld;
31 d_C = d_py+(Y->nc+Y->l)*Y->ld;
32 if (Q) {
33 PetscCall(MatDenseGetLDA(Q,&ldq));
34 PetscCall(BV_MatDenseHIPGetArrayRead(Y,Q,&d_q));
35 d_B = d_q+Y->l*ldq+X->l;
36 PetscCall(BVMult_BLAS_HIP(Y,Y->n,Y->k-Y->l,X->k-X->l,alpha,d_A,X->ld,d_B,ldq,beta,d_C,Y->ld));
37 PetscCall(BV_MatDenseHIPRestoreArrayRead(Y,Q,&d_q));
38 } else PetscCall(BVAXPY_BLAS_HIP(Y,Y->n,Y->k-Y->l,alpha,d_A,X->ld,beta,d_C,Y->ld));
39 PetscCall(MatDenseHIPRestoreArrayRead(x->A,&d_px));
40 if (beta==(PetscScalar)0.0) PetscCall(MatDenseHIPRestoreArrayWrite(y->A,&d_py));
41 else PetscCall(MatDenseHIPRestoreArray(y->A,&d_py));
42 PetscFunctionReturn(PETSC_SUCCESS);
43 }
44
45 7852076831199974864 PetscErrorCode BVMultVec_Mat_HIP(BV X,PetscScalar alpha,PetscScalar beta,Vec y,PetscScalar *q)
46 {
47 7852076831199974864 BV_MAT *x = (BV_MAT*)X->data;
48 7852076831199974864 PetscScalar *d_py,*d_q;
49 7852076831199974864 const PetscScalar *d_px;
50
51 PetscFunctionBegin;
52
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
7852076831199974864 PetscCall(MatDenseHIPGetArrayRead(x->A,&d_px));
53
5/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 2 times.
9856169365676763336 if (beta==(PetscScalar)0.0) PetscCall(VecHIPGetArrayWrite(y,&d_py));
54
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697806471157380 else PetscCall(VecHIPGetArray(y,&d_py));
55
5/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 2 times.
28190245547768033208 if (!q) PetscCall(VecHIPGetArray(X->buffer,&d_q));
56 else {
57 10745547698036824372 PetscInt k=X->k-X->l;
58
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 2 times.
10745547698036824372 PetscCallHIP(hipMalloc((void**)&d_q,k*sizeof(PetscScalar)));
59
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
9592621018531233972 PetscCallHIP(hipMemcpy(d_q,q,k*sizeof(PetscScalar),hipMemcpyHostToDevice));
60
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
9592621018531233972 PetscCall(PetscLogCpuToGpu(k*sizeof(PetscScalar)));
61
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
8349355308748921068 }
62
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
22840061652960817104 PetscCall(BVMultVec_BLAS_HIP(X,X->n,X->k-X->l,alpha,d_px+(X->nc+X->l)*X->ld,X->ld,d_q,beta,d_py));
63
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
7852076831199974864 PetscCall(MatDenseHIPRestoreArrayRead(x->A,&d_px));
64
5/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 2 times.
9856169365676763336 if (beta==(PetscScalar)0.0) PetscCall(VecHIPRestoreArrayWrite(y,&d_py));
65
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697806471157380 else PetscCall(VecHIPRestoreArray(y,&d_py));
66
5/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 2 times.
28190245547768033208 if (!q) PetscCall(VecHIPRestoreArray(X->buffer,&d_q));
67
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 2 times.
10745547698036824372 else PetscCallHIP(hipFree(d_q));
68 22840061652960817104 PetscFunctionReturn(PETSC_SUCCESS);
69 31045524549837904552 }
70
71 5125096685185269760 PetscErrorCode BVMultInPlace_Mat_HIP(BV V,Mat Q,PetscInt s,PetscInt e)
72 {
73 5125096685185269760 BV_MAT *ctx = (BV_MAT*)V->data;
74 5125096685185269760 PetscScalar *d_pv;
75 5125096685185269760 const PetscScalar *d_q;
76 5125096685185269760 PetscInt ldq;
77
78 PetscFunctionBegin;
79
4/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
5125096685185269760 if (s>=e || !V->n) PetscFunctionReturn(PETSC_SUCCESS);
80
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
9736782703612657664 PetscCall(MatDenseGetLDA(Q,&ldq));
81
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
9736782703612657664 PetscCall(MatDenseHIPGetArray(ctx->A,&d_pv));
82
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
9736782703612657664 PetscCall(BV_MatDenseHIPGetArrayRead(V,Q,&d_q));
83
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
9736782703612657664 PetscCall(BVMultInPlace_BLAS_HIP(V,V->n,V->k-V->l,s-V->l,e-V->l,d_pv+(V->nc+V->l)*V->ld,V->ld,d_q+V->l*ldq+V->l,ldq,PETSC_FALSE));
84
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
9736782703612657664 PetscCall(BV_MatDenseHIPRestoreArrayRead(V,Q,&d_q));
85
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
9736782703612657664 PetscCall(MatDenseHIPRestoreArray(ctx->A,&d_pv));
86 9736782703612657664 PetscFunctionReturn(PETSC_SUCCESS);
87 5125096685185269760 }
88
89 PetscErrorCode BVMultInPlaceHermitianTranspose_Mat_HIP(BV V,Mat Q,PetscInt s,PetscInt e)
90 {
91 BV_MAT *ctx = (BV_MAT*)V->data;
92 PetscScalar *d_pv;
93 const PetscScalar *d_q;
94 PetscInt ldq;
95
96 PetscFunctionBegin;
97 if (s>=e || !V->n) PetscFunctionReturn(PETSC_SUCCESS);
98 PetscCall(MatDenseGetLDA(Q,&ldq));
99 PetscCall(MatDenseHIPGetArray(ctx->A,&d_pv));
100 PetscCall(BV_MatDenseHIPGetArrayRead(V,Q,&d_q));
101 PetscCall(BVMultInPlace_BLAS_HIP(V,V->n,V->k-V->l,s-V->l,e-V->l,d_pv+(V->nc+V->l)*V->ld,V->ld,d_q+V->l*ldq+V->l,ldq,PETSC_TRUE));
102 PetscCall(BV_MatDenseHIPRestoreArrayRead(V,Q,&d_q));
103 PetscCall(MatDenseHIPRestoreArray(ctx->A,&d_pv));
104 PetscFunctionReturn(PETSC_SUCCESS);
105 }
106
107 PetscErrorCode BVDot_Mat_HIP(BV X,BV Y,Mat M)
108 {
109 BV_MAT *x = (BV_MAT*)X->data,*y = (BV_MAT*)Y->data;
110 const PetscScalar *d_px,*d_py;
111 PetscScalar *pm;
112 PetscInt ldm;
113
114 PetscFunctionBegin;
115 PetscCall(MatDenseGetLDA(M,&ldm));
116 PetscCall(MatDenseHIPGetArrayRead(x->A,&d_px));
117 PetscCall(MatDenseHIPGetArrayRead(y->A,&d_py));
118 PetscCall(MatDenseGetArrayWrite(M,&pm));
119 PetscCall(BVDot_BLAS_HIP(X,Y->k-Y->l,X->k-X->l,X->n,d_py+(Y->nc+Y->l)*Y->ld,Y->ld,d_px+(X->nc+X->l)*X->ld,X->ld,pm+X->l*ldm+Y->l,ldm,x->mpi));
120 PetscCall(MatDenseRestoreArrayWrite(M,&pm));
121 PetscCall(MatDenseHIPRestoreArrayRead(x->A,&d_px));
122 PetscCall(MatDenseHIPRestoreArrayRead(y->A,&d_py));
123 PetscFunctionReturn(PETSC_SUCCESS);
124 }
125
126 17444697849731208836 PetscErrorCode BVDotVec_Mat_HIP(BV X,Vec y,PetscScalar *q)
127 {
128 17444697849731208836 BV_MAT *x = (BV_MAT*)X->data;
129 17444697849731208836 const PetscScalar *d_px,*d_py;
130 17444697849731208836 Vec z = y;
131
132 PetscFunctionBegin;
133
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
17444697849731208836 if (PetscUnlikely(X->matrix)) {
134 PetscCall(BV_IPMatMult(X,y));
135 z = X->Bx;
136 }
137
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(MatDenseHIPGetArrayRead(x->A,&d_px));
138
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(VecHIPGetArrayRead(z,&d_py));
139
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(BVDotVec_BLAS_HIP(X,X->n,X->k-X->l,d_px+(X->nc+X->l)*X->ld,X->ld,d_py,q,x->mpi));
140
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(VecHIPRestoreArrayRead(z,&d_py));
141
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(MatDenseHIPRestoreArrayRead(x->A,&d_px));
142 17444697849731208836 PetscFunctionReturn(PETSC_SUCCESS);
143 24362257926764751236 }
144
145 PetscErrorCode BVDotVec_Local_Mat_HIP(BV X,Vec y,PetscScalar *m)
146 {
147 BV_MAT *x = (BV_MAT*)X->data;
148 const PetscScalar *d_px,*d_py;
149 Vec z = y;
150
151 PetscFunctionBegin;
152 if (PetscUnlikely(X->matrix)) {
153 PetscCall(BV_IPMatMult(X,y));
154 z = X->Bx;
155 }
156 PetscCall(MatDenseHIPGetArrayRead(x->A,&d_px));
157 PetscCall(VecHIPGetArrayRead(z,&d_py));
158 PetscCall(BVDotVec_BLAS_HIP(X,X->n,X->k-X->l,d_px+(X->nc+X->l)*X->ld,X->ld,d_py,m,PETSC_FALSE));
159 PetscCall(VecHIPRestoreArrayRead(z,&d_py));
160 PetscCall(MatDenseHIPRestoreArrayRead(x->A,&d_px));
161 PetscFunctionReturn(PETSC_SUCCESS);
162 }
163
164 10813140313896392140 PetscErrorCode BVScale_Mat_HIP(BV bv,PetscInt j,PetscScalar alpha)
165 {
166 10813140313896392140 BV_MAT *ctx = (BV_MAT*)bv->data;
167 10813140313896392140 PetscScalar *d_array,*d_A;
168 10813140313896392140 PetscInt n=0;
169
170 PetscFunctionBegin;
171
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
10813140313896392140 if (!bv->n) PetscFunctionReturn(PETSC_SUCCESS);
172
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813140313896392140 PetscCall(MatDenseHIPGetArray(ctx->A,&d_array));
173
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
10813140313896392140 if (PetscUnlikely(j<0)) {
174 d_A = d_array+(bv->nc+bv->l)*bv->ld;
175 n = (bv->k-bv->l)*bv->ld;
176 } else {
177 10813140313896392140 d_A = d_array+(bv->nc+j)*bv->ld;
178 10813140313896392140 n = bv->n;
179 }
180
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
20315742963167664308 PetscCall(BVScale_BLAS_HIP(bv,n,d_A,alpha));
181
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813140313896392140 PetscCall(MatDenseHIPRestoreArray(ctx->A,&d_array));
182 10813140313896392140 PetscFunctionReturn(PETSC_SUCCESS);
183 19757281738334671588 }
184
185 204 PetscErrorCode BVNorm_Mat_HIP(BV bv,PetscInt j,NormType type,PetscReal *val)
186 {
187 204 BV_MAT *ctx = (BV_MAT*)bv->data;
188 204 const PetscScalar *array,*d_array,*d_A;
189 204 PetscInt n=0;
190
191 PetscFunctionBegin;
192
4/10
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
204 if (!ctx->mpi && ((j<0 && type==NORM_FROBENIUS && bv->ld==bv->n) || (j>=0 && type==NORM_2))) {
193 /* compute on GPU with hipBLAS - TODO: include the MPI case here */
194 204 *val = 0.0;
195
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
204 if (!bv->n) PetscFunctionReturn(PETSC_SUCCESS);
196
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
204 PetscCall(MatDenseHIPGetArrayRead(ctx->A,&d_array));
197
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
204 if (PetscUnlikely(j<0)) {
198 d_A = d_array+(bv->nc+bv->l)*bv->ld;
199 n = (bv->k-bv->l)*bv->ld;
200 } else {
201 204 d_A = d_array+(bv->nc+j)*bv->ld;
202 204 n = bv->n;
203 }
204
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
204 PetscCall(BVNorm_BLAS_HIP(bv,n,d_A,val));
205
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
204 PetscCall(MatDenseHIPRestoreArrayRead(ctx->A,&d_array));
206 204 } else {
207 /* compute on CPU */
208 PetscCall(MatDenseGetArrayRead(ctx->A,&array));
209 if (PetscUnlikely(j<0)) PetscCall(BVNorm_LAPACK_Private(bv,bv->n,bv->k-bv->l,array+(bv->nc+bv->l)*bv->ld,bv->ld,type,val,ctx->mpi));
210 else PetscCall(BVNorm_LAPACK_Private(bv,bv->n,1,array+(bv->nc+j)*bv->ld,bv->ld,type,val,ctx->mpi));
211 PetscCall(MatDenseRestoreArrayRead(ctx->A,&array));
212 }
213 204 PetscFunctionReturn(PETSC_SUCCESS);
214 204 }
215
216 PetscErrorCode BVNorm_Local_Mat_HIP(BV bv,PetscInt j,NormType type,PetscReal *val)
217 {
218 BV_MAT *ctx = (BV_MAT*)bv->data;
219 const PetscScalar *array,*d_array,*d_A;
220 PetscInt n=0;
221
222 PetscFunctionBegin;
223 if ((j<0 && type==NORM_FROBENIUS && bv->ld==bv->n) || (j>=0 && type==NORM_2)) {
224 /* compute on GPU with hipBLAS */
225 *val = 0.0;
226 if (!bv->n) PetscFunctionReturn(PETSC_SUCCESS);
227 PetscCall(MatDenseHIPGetArrayRead(ctx->A,&d_array));
228 if (PetscUnlikely(j<0)) {
229 d_A = d_array+(bv->nc+bv->l)*bv->ld;
230 n = (bv->k-bv->l)*bv->ld;
231 } else {
232 d_A = d_array+(bv->nc+j)*bv->ld;
233 n = bv->n;
234 }
235 PetscCall(BVNorm_BLAS_HIP(bv,n,d_A,val));
236 PetscCall(MatDenseHIPRestoreArrayRead(ctx->A,&d_array));
237 } else {
238 /* compute on CPU */
239 PetscCall(MatDenseGetArrayRead(ctx->A,&array));
240 if (PetscUnlikely(j<0)) PetscCall(BVNorm_LAPACK_Private(bv,bv->n,bv->k-bv->l,array+(bv->nc+bv->l)*bv->ld,bv->ld,type,val,PETSC_FALSE));
241 else PetscCall(BVNorm_LAPACK_Private(bv,bv->n,1,array+(bv->nc+j)*bv->ld,bv->ld,type,val,PETSC_FALSE));
242 PetscCall(MatDenseRestoreArrayRead(ctx->A,&array));
243 }
244 PetscFunctionReturn(PETSC_SUCCESS);
245 }
246
247 3587117180510011392 PetscErrorCode BVNormalize_Mat_HIP(BV bv,PetscScalar *eigi)
248 {
249 3587117180510011392 BV_MAT *ctx = (BV_MAT*)bv->data;
250 3587117180510011392 PetscScalar *array,*d_array,*wi=NULL;
251
252 PetscFunctionBegin;
253
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
3587117180510011392 if (eigi) wi = eigi+bv->l;
254
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
3587117180510011392 if (!ctx->mpi) {
255 /* compute on GPU with hipBLAS - TODO: include the MPI case here */
256
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
3587117180510011392 if (!bv->n) PetscFunctionReturn(PETSC_SUCCESS);
257
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3587117180510011392 PetscCall(MatDenseHIPGetArray(ctx->A,&d_array));
258
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3587117180510011392 PetscCall(BVNormalize_BLAS_HIP(bv,bv->n,bv->k-bv->l,d_array+(bv->nc+bv->l)*bv->ld,bv->ld,wi));
259
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3587117180510011392 PetscCall(MatDenseHIPRestoreArray(ctx->A,&d_array));
260 3587117180510011392 } else {
261 /* compute on CPU */
262 PetscCall(MatDenseGetArray(ctx->A,&array));
263 PetscCall(BVNormalize_LAPACK_Private(bv,bv->n,bv->k-bv->l,array+(bv->nc+bv->l)*bv->ld,bv->ld,wi,ctx->mpi));
264 PetscCall(MatDenseRestoreArray(ctx->A,&array));
265 }
266 3587117180510011392 PetscFunctionReturn(PETSC_SUCCESS);
267 3587117180510011392 }
268
269 PetscErrorCode BVMatMult_Mat_HIP(BV V,Mat A,BV W)
270 {
271 BV_MAT *v = (BV_MAT*)V->data,*w = (BV_MAT*)W->data;
272 Mat Vmat,Wmat;
273 const PetscScalar *d_pv;
274 PetscScalar *d_pw;
275 PetscInt j;
276
277 PetscFunctionBegin;
278 if (V->vmm) {
279 PetscCall(BVGetMat(V,&Vmat));
280 PetscCall(BVGetMat(W,&Wmat));
281 PetscCall(MatProductCreateWithMat(A,Vmat,NULL,Wmat));
282 PetscCall(MatProductSetType(Wmat,MATPRODUCT_AB));
283 PetscCall(MatProductSetFromOptions(Wmat));
284 PetscCall(MatProductSymbolic(Wmat));
285 PetscCall(MatProductNumeric(Wmat));
286 PetscCall(MatProductClear(Wmat));
287 PetscCall(BVRestoreMat(V,&Vmat));
288 PetscCall(BVRestoreMat(W,&Wmat));
289 } else {
290 PetscCall(MatDenseHIPGetArrayRead(v->A,&d_pv));
291 PetscCall(MatDenseHIPGetArrayWrite(w->A,&d_pw));
292 for (j=0;j<V->k-V->l;j++) {
293 PetscCall(VecHIPPlaceArray(V->cv[1],(PetscScalar *)d_pv+(V->nc+V->l+j)*V->ld));
294 PetscCall(VecHIPPlaceArray(W->cv[1],d_pw+(W->nc+W->l+j)*W->ld));
295 PetscCall(MatMult(A,V->cv[1],W->cv[1]));
296 PetscCall(VecHIPResetArray(V->cv[1]));
297 PetscCall(VecHIPResetArray(W->cv[1]));
298 }
299 PetscCall(MatDenseHIPRestoreArrayRead(v->A,&d_pv));
300 PetscCall(MatDenseHIPRestoreArrayWrite(w->A,&d_pw));
301 }
302 PetscFunctionReturn(PETSC_SUCCESS);
303 }
304
305 PetscErrorCode BVCopy_Mat_HIP(BV V,BV W)
306 {
307 BV_MAT *v = (BV_MAT*)V->data,*w = (BV_MAT*)W->data;
308 const PetscScalar *d_pv;
309 PetscScalar *d_pw;
310
311 PetscFunctionBegin;
312 PetscCall(MatDenseHIPGetArrayRead(v->A,&d_pv));
313 PetscCall(MatDenseHIPGetArray(w->A,&d_pw));
314 PetscCallHIP(hipMemcpy2D(d_pw+(W->nc+W->l)*W->ld,W->ld*sizeof(PetscScalar),d_pv+(V->nc+V->l)*V->ld,V->ld*sizeof(PetscScalar),V->n*sizeof(PetscScalar),V->k-V->l,hipMemcpyDeviceToDevice));
315 PetscCall(MatDenseHIPRestoreArrayRead(v->A,&d_pv));
316 PetscCall(MatDenseHIPRestoreArray(w->A,&d_pw));
317 PetscFunctionReturn(PETSC_SUCCESS);
318 }
319
320 21856518144 PetscErrorCode BVCopyColumn_Mat_HIP(BV V,PetscInt j,PetscInt i)
321 {
322 21856518144 BV_MAT *v = (BV_MAT*)V->data;
323 21856518144 PetscScalar *d_pv;
324
325 PetscFunctionBegin;
326
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
21856518144 PetscCall(MatDenseHIPGetArray(v->A,&d_pv));
327
2/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
21856518144 PetscCallHIP(hipMemcpy(d_pv+(V->nc+i)*V->ld,d_pv+(V->nc+j)*V->ld,V->n*sizeof(PetscScalar),hipMemcpyDeviceToDevice));
328
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
21856518144 PetscCall(MatDenseHIPRestoreArray(v->A,&d_pv));
329 21856518144 PetscFunctionReturn(PETSC_SUCCESS);
330 21856518144 }
331
332 5512249502985068844 PetscErrorCode BVGetColumn_Mat_HIP(BV bv,PetscInt j,Vec*)
333 {
334 5512249502985068844 BV_MAT *ctx = (BV_MAT*)bv->data;
335 5512249502985068844 PetscScalar *d_pv;
336 5512249502985068844 PetscInt l;
337
338 PetscFunctionBegin;
339
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
5512249502985068844 l = BVAvailableVec;
340
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
31381241267216224316 PetscCall(MatDenseHIPGetArray(ctx->A,&d_pv));
341
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
5512246880202878916 PetscCall(VecHIPPlaceArray(bv->cv[l],d_pv+(bv->nc+j)*bv->ld));
342 5512246880202878916 PetscFunctionReturn(PETSC_SUCCESS);
343 4215815218975969716 }
344
345 5512246880202878916 PetscErrorCode BVRestoreColumn_Mat_HIP(BV bv,PetscInt j,Vec*)
346 {
347 5512246880202878916 BV_MAT *ctx = (BV_MAT*)bv->data;
348 5512246880202878916 PetscInt l;
349
350 PetscFunctionBegin;
351 5512246880202878916 l = (j==bv->ci[0])? 0: 1;
352
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
5512246880202878916 PetscCall(VecHIPResetArray(bv->cv[l]));
353
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
5512246880202878916 PetscCall(MatDenseHIPRestoreArray(ctx->A,NULL));
354 5512246880202878916 PetscFunctionReturn(PETSC_SUCCESS);
355 13943877318154818348 }
356
357 PetscErrorCode BVRestoreSplit_Mat_HIP(BV bv,BV *L,BV *R)
358 {
359 Mat A;
360 const PetscScalar *d_pv;
361 PetscObjectState lstate,rstate;
362 PetscBool change=PETSC_FALSE;
363
364 PetscFunctionBegin;
365 /* force sync flag to PETSC_OFFLOAD_BOTH */
366 if (L) {
367 PetscCall(PetscObjectStateGet((PetscObject)*L,&lstate));
368 if (lstate != bv->lstate) {
369 A = ((BV_MAT*)bv->L->data)->A;
370 PetscCall(MatDenseHIPGetArrayRead(A,&d_pv));
371 PetscCall(MatDenseHIPRestoreArrayRead(A,&d_pv));
372 change = PETSC_TRUE;
373 }
374 }
375 if (R) {
376 PetscCall(PetscObjectStateGet((PetscObject)*R,&rstate));
377 if (rstate != bv->rstate) {
378 A = ((BV_MAT*)bv->R->data)->A;
379 PetscCall(MatDenseHIPGetArrayRead(A,&d_pv));
380 PetscCall(MatDenseHIPRestoreArrayRead(A,&d_pv));
381 change = PETSC_TRUE;
382 }
383 }
384 if (change) {
385 A = ((BV_MAT*)bv->data)->A;
386 PetscCall(MatDenseHIPGetArray(A,(PetscScalar **)&d_pv));
387 PetscCall(MatDenseHIPRestoreArray(A,(PetscScalar **)&d_pv));
388 }
389 PetscFunctionReturn(PETSC_SUCCESS);
390 }
391
392 PetscErrorCode BVRestoreSplitRows_Mat_HIP(BV bv,IS,IS,BV *U,BV *L)
393 {
394 Mat A;
395 const PetscScalar *d_pv;
396 PetscObjectState lstate,rstate;
397 PetscBool change=PETSC_FALSE;
398
399 PetscFunctionBegin;
400 /* force sync flag to PETSC_OFFLOAD_BOTH */
401 if (U) {
402 PetscCall(PetscObjectStateGet((PetscObject)*U,&rstate));
403 if (rstate != bv->rstate) {
404 A = ((BV_MAT*)bv->R->data)->A;
405 PetscCall(MatDenseHIPGetArrayRead(A,&d_pv));
406 PetscCall(MatDenseHIPRestoreArrayRead(A,&d_pv));
407 change = PETSC_TRUE;
408 }
409 }
410 if (L) {
411 PetscCall(PetscObjectStateGet((PetscObject)*L,&lstate));
412 if (lstate != bv->lstate) {
413 A = ((BV_MAT*)bv->L->data)->A;
414 PetscCall(MatDenseHIPGetArrayRead(A,&d_pv));
415 PetscCall(MatDenseHIPRestoreArrayRead(A,&d_pv));
416 change = PETSC_TRUE;
417 }
418 }
419 if (change) {
420 A = ((BV_MAT*)bv->data)->A;
421 PetscCall(MatDenseHIPGetArray(A,(PetscScalar **)&d_pv));
422 PetscCall(MatDenseHIPRestoreArray(A,(PetscScalar **)&d_pv));
423 }
424 PetscFunctionReturn(PETSC_SUCCESS);
425 }
426
427 PetscErrorCode BVGetMat_Mat_HIP(BV bv,Mat *A)
428 {
429 BV_MAT *ctx = (BV_MAT*)bv->data;
430 PetscScalar *vv,*aa;
431 PetscBool create=PETSC_FALSE;
432 PetscInt m,cols;
433
434 PetscFunctionBegin;
435 m = bv->k-bv->l;
436 if (!bv->Aget) create=PETSC_TRUE;
437 else {
438 PetscCall(MatDenseHIPGetArray(bv->Aget,&aa));
439 PetscCheck(!aa,PetscObjectComm((PetscObject)bv),PETSC_ERR_ARG_WRONGSTATE,"BVGetMat already called on this BV");
440 PetscCall(MatGetSize(bv->Aget,NULL,&cols));
441 if (cols!=m) {
442 PetscCall(MatDestroy(&bv->Aget));
443 create=PETSC_TRUE;
444 }
445 }
446 PetscCall(MatDenseHIPGetArray(ctx->A,&vv));
447 if (create) {
448 PetscCall(MatCreateDenseFromVecType(PetscObjectComm((PetscObject)bv),bv->vtype,bv->n,PETSC_DECIDE,bv->N,m,bv->ld,vv,&bv->Aget)); /* pass a pointer to avoid allocation of storage */
449 PetscCall(MatDenseHIPReplaceArray(bv->Aget,NULL)); /* replace with a null pointer, the value after BVRestoreMat */
450 }
451 PetscCall(MatDenseHIPPlaceArray(bv->Aget,vv+(bv->nc+bv->l)*bv->ld)); /* set the actual pointer */
452 *A = bv->Aget;
453 PetscFunctionReturn(PETSC_SUCCESS);
454 }
455
456 PetscErrorCode BVRestoreMat_Mat_HIP(BV bv,Mat *A)
457 {
458 BV_MAT *ctx = (BV_MAT*)bv->data;
459 PetscScalar *vv,*aa;
460
461 PetscFunctionBegin;
462 PetscCall(MatDenseHIPGetArray(bv->Aget,&aa));
463 vv = aa-(bv->nc+bv->l)*bv->ld;
464 PetscCall(MatDenseHIPResetArray(bv->Aget));
465 PetscCall(MatDenseHIPRestoreArray(ctx->A,&vv));
466 *A = NULL;
467 PetscFunctionReturn(PETSC_SUCCESS);
468 }
469