GCC Code Coverage Report


Directory: ./
File: src/sys/classes/bv/impls/hip/bvhip.hip.cpp
Date: 2025-12-10 04:20:18
Exec Total Coverage
Lines: 199 370 53.8%
Functions: 12 16 75.0%
Branches: 257 1108 23.2%

Line Branch Exec Source
1 /*
2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 SLEPc - Scalable Library for Eigenvalue Problem Computations
4 Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5
6 This file is part of SLEPc.
7 SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 */
10 /*
11 HIP-related code common to several BV impls
12 */
13
14 #include <slepc/private/bvimpl.h>
15 #include <slepccupmblas.h>
16
17 #define BLOCKSIZE 64
18
19 /*
20 C := alpha*A*B + beta*C
21 */
22 PetscErrorCode BVMult_BLAS_HIP(BV,PetscInt m_,PetscInt n_,PetscInt k_,PetscScalar alpha,const PetscScalar *d_A,PetscInt lda_,const PetscScalar *d_B,PetscInt ldb_,PetscScalar beta,PetscScalar *d_C,PetscInt ldc_)
23 {
24 PetscHipBLASInt m=0,n=0,k=0,lda=0,ldb=0,ldc=0;
25 hipblasHandle_t hipblashandle;
26
27 PetscFunctionBegin;
28 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
29 PetscCall(PetscHipBLASIntCast(m_,&m));
30 PetscCall(PetscHipBLASIntCast(n_,&n));
31 PetscCall(PetscHipBLASIntCast(k_,&k));
32 PetscCall(PetscHipBLASIntCast(lda_,&lda));
33 PetscCall(PetscHipBLASIntCast(ldb_,&ldb));
34 PetscCall(PetscHipBLASIntCast(ldc_,&ldc));
35 PetscCall(PetscLogGpuTimeBegin());
36 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_N,HIPBLAS_OP_N,m,n,k,&alpha,d_A,lda,d_B,ldb,&beta,d_C,ldc));
37 PetscCall(PetscLogGpuTimeEnd());
38 PetscCall(PetscLogGpuFlops(2.0*m*n*k));
39 PetscFunctionReturn(PETSC_SUCCESS);
40 }
41
42 /*
43 y := alpha*A*x + beta*y
44 */
45 4403601413398079328 PetscErrorCode BVMultVec_BLAS_HIP(BV,PetscInt n_,PetscInt k_,PetscScalar alpha,const PetscScalar *d_A,PetscInt lda_,const PetscScalar *d_x,PetscScalar beta,PetscScalar *d_y)
46 {
47 4403601413398079328 PetscHipBLASInt n=0,k=0,lda=0,one=1;
48 4403601413398079328 hipblasHandle_t hipblashandle;
49
50 PetscFunctionBegin;
51
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4403601413398079328 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
52
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4403601413398079328 PetscCall(PetscHipBLASIntCast(n_,&n));
53
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4403601413398079328 PetscCall(PetscHipBLASIntCast(k_,&k));
54
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4403601413398079328 PetscCall(PetscHipBLASIntCast(lda_,&lda));
55
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4403601413398079328 PetscCall(PetscLogGpuTimeBegin());
56
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
4403601413398079328 PetscCallHIPBLAS(hipblasXgemv(hipblashandle,HIPBLAS_OP_N,n,k,&alpha,d_A,lda,d_x,one,&beta,d_y,one));
57
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4403601413398079328 PetscCall(PetscLogGpuTimeEnd());
58
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4403601413398079328 PetscCall(PetscLogGpuFlops(2.0*n*k));
59 4403601413398079328 PetscFunctionReturn(PETSC_SUCCESS);
60 15325409517198366880 }
61
62 /*
63 A(:,s:e-1) := A*B(:,s:e-1)
64 */
65 16227252309694873600 PetscErrorCode BVMultInPlace_BLAS_HIP(BV,PetscInt m_,PetscInt k_,PetscInt s,PetscInt e,PetscScalar *d_A,PetscInt lda_,const PetscScalar *d_B,PetscInt ldb_,PetscBool btrans)
66 {
67 16227252309694873600 const PetscScalar *d_B1;
68 16227252309694873600 PetscScalar *d_work,sone=1.0,szero=0.0;
69 16227252309694873600 PetscHipBLASInt m=0,n=0,k=0,l=0,lda=0,ldb=0,bs=BLOCKSIZE;
70 16227252309694873600 size_t freemem,totmem;
71 16227252309694873600 hipblasHandle_t hipblashandle;
72 16227252309694873600 hipblasOperation_t bt;
73
74 PetscFunctionBegin;
75
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
16227252309694873600 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
76
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
16227252309694873600 PetscCall(PetscHipBLASIntCast(m_,&m));
77
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
16227252309694873600 PetscCall(PetscHipBLASIntCast(e-s,&n));
78
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
16227252309694873600 PetscCall(PetscHipBLASIntCast(k_,&k));
79
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
16227252309694873600 PetscCall(PetscHipBLASIntCast(lda_,&lda));
80
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
16227252309694873600 PetscCall(PetscHipBLASIntCast(ldb_,&ldb));
81
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
16227252309694873600 PetscCall(PetscLogGpuTimeBegin());
82
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16227252309694873600 if (PetscUnlikely(btrans)) {
83 d_B1 = d_B+s;
84 bt = HIPBLAS_OP_C;
85 } else {
86 16227252309694873600 d_B1 = d_B+s*ldb;
87 16227252309694873600 bt = HIPBLAS_OP_N;
88 }
89 /* try to allocate the whole matrix */
90
2/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
16227252309694873600 PetscCallHIP(hipMemGetInfo(&freemem,&totmem));
91
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
16227252309694873600 if (freemem>=lda*n*sizeof(PetscScalar)) {
92
2/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
16227252309694873600 PetscCallHIP(hipMalloc((void**)&d_work,lda*n*sizeof(PetscScalar)));
93
2/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
16227252309694873600 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_N,bt,m,n,k,&sone,d_A,lda,d_B1,ldb,&szero,d_work,lda));
94
2/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
16227252309694873600 PetscCallHIP(hipMemcpy2D(d_A+s*lda,lda*sizeof(PetscScalar),d_work,lda*sizeof(PetscScalar),m*sizeof(PetscScalar),n,hipMemcpyDeviceToDevice));
95 16227252309694873600 } else {
96 PetscCall(PetscHipBLASIntCast(freemem/(m*sizeof(PetscScalar)),&bs));
97 PetscCallHIP(hipMalloc((void**)&d_work,bs*n*sizeof(PetscScalar)));
98 PetscCall(PetscHipBLASIntCast(m % bs,&l));
99 if (l) {
100 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_N,bt,l,n,k,&sone,d_A,lda,d_B1,ldb,&szero,d_work,l));
101 PetscCallHIP(hipMemcpy2D(d_A+s*lda,lda*sizeof(PetscScalar),d_work,l*sizeof(PetscScalar),l*sizeof(PetscScalar),n,hipMemcpyDeviceToDevice));
102 }
103 for (;l<m;l+=bs) {
104 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_N,bt,bs,n,k,&sone,d_A+l,lda,d_B1,ldb,&szero,d_work,bs));
105 PetscCallHIP(hipMemcpy2D(d_A+l+s*lda,lda*sizeof(PetscScalar),d_work,bs*sizeof(PetscScalar),bs*sizeof(PetscScalar),n,hipMemcpyDeviceToDevice));
106 }
107 }
108
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
16227252309694873600 PetscCall(PetscLogGpuTimeEnd());
109
2/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
16227252309694873600 PetscCallHIP(hipFree(d_work));
110
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
16227252309694873600 PetscCall(PetscLogGpuFlops(2.0*m*n*k));
111 16227252309694873600 PetscFunctionReturn(PETSC_SUCCESS);
112 16227252309694873600 }
113
114 /*
115 B := alpha*A + beta*B
116 */
117 PetscErrorCode BVAXPY_BLAS_HIP(BV,PetscInt n_,PetscInt k_,PetscScalar alpha,const PetscScalar *d_A,PetscInt lda_,PetscScalar beta,PetscScalar *d_B,PetscInt ldb_)
118 {
119 PetscHipBLASInt n=0,k=0,lda=0,ldb=0;
120 hipblasHandle_t hipblashandle;
121
122 PetscFunctionBegin;
123 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
124 PetscCall(PetscHipBLASIntCast(n_,&n));
125 PetscCall(PetscHipBLASIntCast(k_,&k));
126 PetscCall(PetscHipBLASIntCast(lda_,&lda));
127 PetscCall(PetscHipBLASIntCast(ldb_,&ldb));
128 PetscCall(PetscLogGpuTimeBegin());
129 PetscCallHIPBLAS(hipblasXgeam(hipblashandle,HIPBLAS_OP_N,HIPBLAS_OP_N,n,k,&alpha,d_A,lda,&beta,d_B,ldb,d_B,ldb));
130 PetscCall(PetscLogGpuTimeEnd());
131 PetscCall(PetscLogGpuFlops((beta==(PetscScalar)1.0)?2.0*n*k:3.0*n*k));
132 PetscFunctionReturn(PETSC_SUCCESS);
133 }
134
135 /*
136 C := A'*B
137
138 C is a CPU array
139 */
140 PetscErrorCode BVDot_BLAS_HIP(BV bv,PetscInt m_,PetscInt n_,PetscInt k_,const PetscScalar *d_A,PetscInt lda_,const PetscScalar *d_B,PetscInt ldb_,PetscScalar *C,PetscInt ldc_,PetscBool mpi)
141 {
142 PetscScalar *d_work,sone=1.0,szero=0.0,*CC;
143 PetscInt j;
144 PetscHipBLASInt m=0,n=0,k=0,lda=0,ldb=0,ldc=0;
145 PetscMPIInt len;
146 hipblasHandle_t hipblashandle;
147
148 PetscFunctionBegin;
149 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
150 PetscCall(PetscHipBLASIntCast(m_,&m));
151 PetscCall(PetscHipBLASIntCast(n_,&n));
152 PetscCall(PetscHipBLASIntCast(k_,&k));
153 PetscCall(PetscHipBLASIntCast(lda_,&lda));
154 PetscCall(PetscHipBLASIntCast(ldb_,&ldb));
155 PetscCall(PetscHipBLASIntCast(ldc_,&ldc));
156 PetscCallHIP(hipMalloc((void**)&d_work,m*n*sizeof(PetscScalar)));
157 if (mpi) {
158 if (ldc==m) {
159 PetscCall(BVAllocateWork_Private(bv,m*n));
160 if (k) {
161 PetscCall(PetscLogGpuTimeBegin());
162 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_C,HIPBLAS_OP_N,m,n,k,&sone,d_A,lda,d_B,ldb,&szero,d_work,ldc));
163 PetscCall(PetscLogGpuTimeEnd());
164 PetscCallHIP(hipMemcpy(bv->work,d_work,m*n*sizeof(PetscScalar),hipMemcpyDeviceToHost));
165 PetscCall(PetscLogGpuToCpu(m*n*sizeof(PetscScalar)));
166 } else PetscCall(PetscArrayzero(bv->work,m*n));
167 PetscCall(PetscMPIIntCast(m*n,&len));
168 PetscCallMPI(MPIU_Allreduce(bv->work,C,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv)));
169 } else {
170 PetscCall(BVAllocateWork_Private(bv,2*m*n));
171 CC = bv->work+m*n;
172 if (k) {
173 PetscCall(PetscLogGpuTimeBegin());
174 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_C,HIPBLAS_OP_N,m,n,k,&sone,d_A,lda,d_B,ldb,&szero,d_work,m));
175 PetscCall(PetscLogGpuTimeEnd());
176 PetscCallHIP(hipMemcpy(bv->work,d_work,m*n*sizeof(PetscScalar),hipMemcpyDeviceToHost));
177 PetscCall(PetscLogGpuToCpu(m*n*sizeof(PetscScalar)));
178 } else PetscCall(PetscArrayzero(bv->work,m*n));
179 PetscCall(PetscMPIIntCast(m*n,&len));
180 PetscCallMPI(MPIU_Allreduce(bv->work,CC,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv)));
181 for (j=0;j<n;j++) PetscCall(PetscArraycpy(C+j*ldc,CC+j*m,m));
182 }
183 } else {
184 if (k) {
185 PetscCall(BVAllocateWork_Private(bv,m*n));
186 PetscCall(PetscLogGpuTimeBegin());
187 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_C,HIPBLAS_OP_N,m,n,k,&sone,d_A,lda,d_B,ldb,&szero,d_work,m));
188 PetscCall(PetscLogGpuTimeEnd());
189 PetscCallHIP(hipMemcpy(bv->work,d_work,m*n*sizeof(PetscScalar),hipMemcpyDeviceToHost));
190 PetscCall(PetscLogGpuToCpu(m*n*sizeof(PetscScalar)));
191 for (j=0;j<n;j++) PetscCall(PetscArraycpy(C+j*ldc,bv->work+j*m,m));
192 }
193 }
194 PetscCallHIP(hipFree(d_work));
195 PetscCall(PetscLogGpuFlops(2.0*m*n*k));
196 PetscFunctionReturn(PETSC_SUCCESS);
197 }
198
199 /*
200 y := A'*x
201
202 y is a CPU array, if NULL bv->buffer is used as a workspace
203 */
204 14459712507201182024 PetscErrorCode BVDotVec_BLAS_HIP(BV bv,PetscInt n_,PetscInt k_,const PetscScalar *d_A,PetscInt lda_,const PetscScalar *d_x,PetscScalar *y,PetscBool mpi)
205 {
206 14459712507201182024 PetscScalar *d_work,szero=0.0,sone=1.0,*yy;
207 14459712507201182024 PetscHipBLASInt n=0,k=0,lda=0,one=1;
208 14459712507201182024 PetscMPIInt len;
209 14459712507201182024 hipblasHandle_t hipblashandle;
210
211 PetscFunctionBegin;
212
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
213
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscHipBLASIntCast(n_,&n));
214
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscHipBLASIntCast(k_,&k));
215
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscHipBLASIntCast(lda_,&lda));
216
4/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 2 times.
14459712507201182024 if (!y) PetscCall(VecHIPGetArrayWrite(bv->buffer,&d_work));
217 else PetscCallHIP(hipMalloc((void**)&d_work,k*sizeof(PetscScalar)));
218
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
23683102943695899208 if (mpi) {
219 PetscCall(BVAllocateWork_Private(bv,k));
220 if (n) {
221 PetscCall(PetscLogGpuTimeBegin());
222 PetscCallHIPBLAS(hipblasXgemv(hipblashandle,HIPBLAS_OP_C,n,k,&sone,d_A,lda,d_x,one,&szero,d_work,one));
223 PetscCall(PetscLogGpuTimeEnd());
224 PetscCallHIP(hipMemcpy(bv->work,d_work,k*sizeof(PetscScalar),hipMemcpyDeviceToHost));
225 PetscCall(PetscLogGpuToCpu(k*sizeof(PetscScalar)));
226 } else PetscCall(PetscArrayzero(bv->work,k));
227 /* reduction */
228 PetscCall(PetscMPIIntCast(k,&len));
229 if (!y) {
230 if (use_gpu_aware_mpi) { /* case 1: reduce on GPU using a temporary buffer */
231 PetscCallHIP(hipMalloc((void**)&yy,k*sizeof(PetscScalar)));
232 PetscCallMPI(MPIU_Allreduce(d_work,yy,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv)));
233 PetscCallHIP(hipMemcpy(d_work,yy,k*sizeof(PetscScalar),hipMemcpyDeviceToDevice));
234 PetscCallHIP(hipFree(yy));
235 } else { /* case 2: reduce on CPU, copy result back to GPU */
236 PetscCall(BVAllocateWork_Private(bv,2*k));
237 yy = bv->work+k;
238 PetscCallHIP(hipMemcpy(bv->work,d_work,k*sizeof(PetscScalar),hipMemcpyDeviceToHost));
239 PetscCall(PetscLogGpuToCpu(k*sizeof(PetscScalar)));
240 PetscCallMPI(MPIU_Allreduce(bv->work,yy,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv)));
241 PetscCallHIP(hipMemcpy(d_work,yy,k*sizeof(PetscScalar),hipMemcpyHostToDevice));
242 PetscCall(PetscLogCpuToGpu(k*sizeof(PetscScalar)));
243 }
244 PetscCall(VecHIPRestoreArrayWrite(bv->buffer,&d_work));
245 } else { /* case 3: user-provided array y, reduce on CPU */
246 PetscCallHIP(hipFree(d_work));
247 PetscCallMPI(MPIU_Allreduce(bv->work,y,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv)));
248 }
249 } else {
250
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
14459712507201182024 if (n) {
251
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscLogGpuTimeBegin());
252
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
14459712507201182024 PetscCallHIPBLAS(hipblasXgemv(hipblashandle,HIPBLAS_OP_C,n,k,&sone,d_A,lda,d_x,one,&szero,d_work,one));
253
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscLogGpuTimeEnd());
254 14459712507201182024 }
255
4/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 2 times.
23683102943695899208 if (!y) PetscCall(VecHIPRestoreArrayWrite(bv->buffer,&d_work));
256 else {
257 PetscCallHIP(hipMemcpy(y,d_work,k*sizeof(PetscScalar),hipMemcpyDeviceToHost));
258 PetscCall(PetscLogGpuToCpu(k*sizeof(PetscScalar)));
259 PetscCallHIP(hipFree(d_work));
260 }
261 }
262
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscLogGpuFlops(2.0*n*k));
263 14459712507201182024 PetscFunctionReturn(PETSC_SUCCESS);
264 9848127686793471688 }
265
266 /*
267 Scale n scalars
268 */
269 4511865541610169816 PetscErrorCode BVScale_BLAS_HIP(BV,PetscInt n_,PetscScalar *d_A,PetscScalar alpha)
270 {
271 4511865541610169816 PetscHipBLASInt n=0,one=1;
272 4511865541610169816 hipblasHandle_t hipblashandle;
273
274 PetscFunctionBegin;
275
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
4511865541610169816 PetscCall(PetscHipBLASIntCast(n_,&n));
276
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
4511865541610169816 if (PetscUnlikely(alpha == (PetscScalar)0.0)) PetscCallHIP(hipMemset(d_A,0,n*sizeof(PetscScalar)));
277
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4511865541610169816 else if (alpha != (PetscScalar)1.0) {
278
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
4511865541610169816 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
279
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
4511865541610169816 PetscCall(PetscLogGpuTimeBegin());
280
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 2 times.
4511865541610169816 PetscCallHIPBLAS(hipblasXscal(hipblashandle,n,&alpha,d_A,one));
281
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
4511865541610169816 PetscCall(PetscLogGpuTimeEnd());
282
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
4511865541610169816 PetscCall(PetscLogGpuFlops(1.0*n));
283 4511865541610169816 }
284 32381622605808933416 PetscFunctionReturn(PETSC_SUCCESS);
285 21760763893513104376 }
286
287 /*
288 Compute 2-norm of vector consisting of n scalars
289 */
290 216 PetscErrorCode BVNorm_BLAS_HIP(BV,PetscInt n_,const PetscScalar *d_A,PetscReal *nrm)
291 {
292 216 PetscHipBLASInt n=0,one=1;
293 216 hipblasHandle_t hipblashandle;
294
295 PetscFunctionBegin;
296
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
216 PetscCall(PetscHipBLASIntCast(n_,&n));
297
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
216 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
298
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
216 PetscCall(PetscLogGpuTimeBegin());
299
2/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
216 PetscCallHIPBLAS(hipblasXnrm2(hipblashandle,n,d_A,one,nrm));
300
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
216 PetscCall(PetscLogGpuTimeEnd());
301
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
216 PetscCall(PetscLogGpuFlops(2.0*n));
302 216 PetscFunctionReturn(PETSC_SUCCESS);
303 216 }
304
305 /*
306 Normalize the columns of A
307 */
308 8668499095851106304 PetscErrorCode BVNormalize_BLAS_HIP(BV,PetscInt m_,PetscInt n_,PetscScalar *d_A,PetscInt lda_,PetscScalar *eigi)
309 {
310 8668499095851106304 PetscInt j,k;
311 8668499095851106304 PetscReal nrm,nrm1;
312 8668499095851106304 PetscScalar alpha;
313 8668499095851106304 PetscHipBLASInt m=0,one=1;
314 8668499095851106304 hipblasHandle_t hipblashandle;
315
316 PetscFunctionBegin;
317
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
8668499095851106304 PetscCall(PetscHipBLASIntCast(m_,&m));
318
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
8668499095851106304 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
319
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
8668499095851106304 PetscCall(PetscLogGpuTimeBegin());
320
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
24895751405545979904 for (j=0;j<n_;j++) {
321 16227252309694873600 k = 1;
322 #if !defined(PETSC_USE_COMPLEX)
323
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
7003880272840097792 if (eigi && eigi[j] != 0.0) k = 2;
324 #endif
325
2/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
16227252309694873600 PetscCallHIPBLAS(hipblasXnrm2(hipblashandle,m,d_A+j*lda_,one,&nrm));
326
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
16227252309694873600 if (k==2) {
327 PetscCallHIPBLAS(hipblasXnrm2(hipblashandle,m,d_A+(j+1)*lda_,one,&nrm1));
328 nrm = SlepcAbs(nrm,nrm1);
329 }
330 16227252309694873600 alpha = 1.0/nrm;
331
2/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
16227252309694873600 PetscCallHIPBLAS(hipblasXscal(hipblashandle,m,&alpha,d_A+j*lda_,one));
332
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
16227252309694873600 if (k==2) {
333 PetscCallHIPBLAS(hipblasXscal(hipblashandle,m,&alpha,d_A+(j+1)*lda_,one));
334 j++;
335 }
336 16227252309694873600 }
337
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
8668499095851106304 PetscCall(PetscLogGpuTimeEnd());
338
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
8668499095851106304 PetscCall(PetscLogGpuFlops(3.0*m*n_));
339 8668499095851106304 PetscFunctionReturn(PETSC_SUCCESS);
340 8668499095851106304 }
341
342 /*
343 BV_CleanCoefficients_HIP - Sets to zero all entries of column j of the bv buffer
344 */
345 4511865581170845360 PetscErrorCode BV_CleanCoefficients_HIP(BV bv,PetscInt j,PetscScalar *h)
346 {
347 4511865581170845360 PetscScalar *d_hh,*d_a;
348 4511865581170845360 PetscInt i;
349
350 PetscFunctionBegin;
351
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4511865581170845360 if (!h) {
352
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
4511865581170845360 PetscCall(VecHIPGetArray(bv->buffer,&d_a));
353
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
4511865581170845360 PetscCall(PetscLogGpuTimeBegin());
354 4511865581170845360 d_hh = d_a + j*(bv->nc+bv->m);
355
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 2 times.
4511865581170845360 PetscCallHIP(hipMemset(d_hh,0,(bv->nc+j)*sizeof(PetscScalar)));
356
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
4511865581170845360 PetscCall(PetscLogGpuTimeEnd());
357
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
4511865581170845360 PetscCall(VecHIPRestoreArray(bv->buffer,&d_a));
358 4511865581170845360 } else { /* cpu memory */
359 for (i=0;i<bv->nc+j;i++) h[i] = 0.0;
360 }
361 32381622566248257872 PetscFunctionReturn(PETSC_SUCCESS);
362 3713302083118505008 }
363
364 /*
365 BV_AddCoefficients_HIP - Add the contents of the scratch (0-th column) of the bv buffer
366 into column j of the bv buffer
367 */
368 14459712507201182024 PetscErrorCode BV_AddCoefficients_HIP(BV bv,PetscInt j,PetscScalar *h,PetscScalar *c)
369 {
370 14459712507201182024 PetscScalar *d_h,*d_c,sone=1.0;
371 14459712507201182024 PetscInt i;
372 14459712507201182024 PetscHipBLASInt idx=0,one=1;
373 14459712507201182024 hipblasHandle_t hipblashandle;
374
375 PetscFunctionBegin;
376
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
14459712507201182024 if (!h) {
377
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
378
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(VecHIPGetArray(bv->buffer,&d_c));
379 14459712507201182024 d_h = d_c + j*(bv->nc+bv->m);
380
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscHipBLASIntCast(bv->nc+j,&idx));
381
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscLogGpuTimeBegin());
382
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
14459712507201182024 PetscCallHIPBLAS(hipblasXaxpy(hipblashandle,idx,&sone,d_c,one,d_h,one));
383
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscLogGpuTimeEnd());
384
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscLogGpuFlops(1.0*(bv->nc+j)));
385
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(VecHIPRestoreArray(bv->buffer,&d_c));
386 14459712507201182024 } else { /* cpu memory */
387 for (i=0;i<bv->nc+j;i++) h[i] += c[i];
388 PetscCall(PetscLogFlops(1.0*(bv->nc+j)));
389 }
390 19071407725448540616 PetscFunctionReturn(PETSC_SUCCESS);
391 14459749306481064776 }
392
393 /*
394 BV_SetValue_HIP - Sets value in row j (counted after the constraints) of column k
395 of the coefficients array
396 */
397 4511865581170845360 PetscErrorCode BV_SetValue_HIP(BV bv,PetscInt j,PetscInt k,PetscScalar *h,PetscScalar value)
398 {
399 4511865581170845360 PetscScalar *d_h,*a;
400
401 PetscFunctionBegin;
402
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4511865581170845360 if (!h) {
403
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
4511865581170845360 PetscCall(VecHIPGetArray(bv->buffer,&a));
404
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
4511865581170845360 PetscCall(PetscLogGpuTimeBegin());
405 4511865581170845360 d_h = a + k*(bv->nc+bv->m) + bv->nc+j;
406
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 2 times.
4511865581170845360 PetscCallHIP(hipMemcpy(d_h,&value,sizeof(PetscScalar),hipMemcpyHostToDevice));
407
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
4511865581170845360 PetscCall(PetscLogCpuToGpu(sizeof(PetscScalar)));
408
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
4511865581170845360 PetscCall(PetscLogGpuTimeEnd());
409
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
4511865581170845360 PetscCall(VecHIPRestoreArray(bv->buffer,&a));
410 4511865581170845360 } else { /* cpu memory */
411 h[bv->nc+j] = value;
412 }
413 32381622566248257872 PetscFunctionReturn(PETSC_SUCCESS);
414 21760764407801886448 }
415
416 /*
417 BV_SquareSum_HIP - Returns the value h'*h, where h represents the contents of the
418 coefficients array (up to position j)
419 */
420 14459712507201182024 PetscErrorCode BV_SquareSum_HIP(BV bv,PetscInt j,PetscScalar *h,PetscReal *sum)
421 {
422 14459712507201182024 const PetscScalar *d_h;
423 14459712507201182024 PetscScalar dot;
424 14459712507201182024 PetscInt i;
425 14459712507201182024 PetscHipBLASInt idx=0,one=1;
426 14459712507201182024 hipblasHandle_t hipblashandle;
427
428 PetscFunctionBegin;
429
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
14459712507201182024 if (!h) {
430
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
431
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(VecHIPGetArrayRead(bv->buffer,&d_h));
432
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscHipBLASIntCast(bv->nc+j,&idx));
433
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscLogGpuTimeBegin());
434
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
14459712507201182024 PetscCallHIPBLAS(hipblasXdot(hipblashandle,idx,d_h,one,d_h,one,&dot));
435
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscLogGpuTimeEnd());
436
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscLogGpuFlops(2.0*(bv->nc+j)));
437 14459712507201182024 *sum = PetscRealPart(dot);
438
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(VecHIPRestoreArrayRead(bv->buffer,&d_h));
439 14459712507201182024 } else { /* cpu memory */
440 *sum = 0.0;
441 for (i=0;i<bv->nc+j;i++) *sum += PetscRealPart(h[i]*PetscConj(h[i]));
442 PetscCall(PetscLogFlops(2.0*(bv->nc+j)));
443 }
444 19071407725448540616 PetscFunctionReturn(PETSC_SUCCESS);
445 14459749306481064776 }
446
447 /* pointwise multiplication */
448 static __global__ void PointwiseMult_kernel(PetscInt xcount,PetscScalar *a,const PetscScalar *b,PetscInt n)
449 {
450 PetscInt x;
451
452 x = xcount*gridDim.x*blockDim.x+blockIdx.x*blockDim.x+threadIdx.x;
453 if (x<n) a[x] *= PetscRealPart(b[x]);
454 }
455
456 /* pointwise division */
457 static __global__ void PointwiseDiv_kernel(PetscInt xcount,PetscScalar *a,const PetscScalar *b,PetscInt n)
458 {
459 PetscInt x;
460
461 x = xcount*gridDim.x*blockDim.x+blockIdx.x*blockDim.x+threadIdx.x;
462 if (x<n) a[x] /= PetscRealPart(b[x]);
463 }
464
465 /*
466 BV_ApplySignature_HIP - Computes the pointwise product h*omega, where h represents
467 the contents of the coefficients array (up to position j) and omega is the signature;
468 if inverse=TRUE then the operation is h/omega
469 */
470 PetscErrorCode BV_ApplySignature_HIP(BV bv,PetscInt j,PetscScalar *h,PetscBool inverse)
471 {
472 PetscScalar *d_h;
473 const PetscScalar *d_omega,*omega;
474 PetscInt i,xcount;
475 dim3 blocks3d, threads3d;
476
477 PetscFunctionBegin;
478 if (!(bv->nc+j)) PetscFunctionReturn(PETSC_SUCCESS);
479 if (!h) {
480 PetscCall(VecHIPGetArray(bv->buffer,&d_h));
481 PetscCall(VecHIPGetArrayRead(bv->omega,&d_omega));
482 PetscCall(SlepcKernelSetGrid1D(bv->nc+j,&blocks3d,&threads3d,&xcount));
483 PetscCall(PetscLogGpuTimeBegin());
484 if (inverse) {
485 for (i=0;i<xcount;i++) PointwiseDiv_kernel<<<blocks3d,threads3d,0,0>>>(i,d_h,d_omega,bv->nc+j);
486 } else {
487 for (i=0;i<xcount;i++) PointwiseMult_kernel<<<blocks3d,threads3d,0,0>>>(i,d_h,d_omega,bv->nc+j);
488 }
489 PetscCallHIP(hipGetLastError());
490 PetscCall(PetscLogGpuTimeEnd());
491 PetscCall(PetscLogGpuFlops(1.0*(bv->nc+j)));
492 PetscCall(VecHIPRestoreArrayRead(bv->omega,&d_omega));
493 PetscCall(VecHIPRestoreArray(bv->buffer,&d_h));
494 } else {
495 PetscCall(VecGetArrayRead(bv->omega,&omega));
496 if (inverse) for (i=0;i<bv->nc+j;i++) h[i] /= PetscRealPart(omega[i]);
497 else for (i=0;i<bv->nc+j;i++) h[i] *= PetscRealPart(omega[i]);
498 PetscCall(VecRestoreArrayRead(bv->omega,&omega));
499 PetscCall(PetscLogFlops(1.0*(bv->nc+j)));
500 }
501 PetscFunctionReturn(PETSC_SUCCESS);
502 }
503
504 /*
505 BV_SquareRoot_HIP - Returns the square root of position j (counted after the constraints)
506 of the coefficients array
507 */
508 14459712507201182024 PetscErrorCode BV_SquareRoot_HIP(BV bv,PetscInt j,PetscScalar *h,PetscReal *beta)
509 {
510 14459712507201182024 const PetscScalar *d_h;
511 14459712507201182024 PetscScalar hh;
512
513 PetscFunctionBegin;
514
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
14459712507201182024 if (!h) {
515
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(VecHIPGetArrayRead(bv->buffer,&d_h));
516
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscLogGpuTimeBegin());
517
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
14459712507201182024 PetscCallHIP(hipMemcpy(&hh,d_h+bv->nc+j,sizeof(PetscScalar),hipMemcpyDeviceToHost));
518
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscLogGpuToCpu(sizeof(PetscScalar)));
519
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(PetscLogGpuTimeEnd());
520
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(BV_SafeSqrt(bv,hh,beta));
521
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
14459712507201182024 PetscCall(VecHIPRestoreArrayRead(bv->buffer,&d_h));
522
0/4
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
14459712507201182024 } else PetscCall(BV_SafeSqrt(bv,h[bv->nc+j],beta));
523 19071407725448540616 PetscFunctionReturn(PETSC_SUCCESS);
524 9848090887513588936 }
525
526 /*
527 BV_StoreCoefficients_HIP - Copy the contents of the coefficients array to an array dest
528 provided by the caller (only values from l to j are copied)
529 */
530 4511867954811371520 PetscErrorCode BV_StoreCoefficients_HIP(BV bv,PetscInt j,PetscScalar *h,PetscScalar *dest)
531 {
532 4511867954811371520 const PetscScalar *d_h,*d_a;
533 4511867954811371520 PetscInt i;
534
535 PetscFunctionBegin;
536
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4511867954811371520 if (!h) {
537
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
4511867954811371520 PetscCall(VecHIPGetArrayRead(bv->buffer,&d_a));
538
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
4511867954811371520 PetscCall(PetscLogGpuTimeBegin());
539 4511867954811371520 d_h = d_a + j*(bv->nc+bv->m)+bv->nc;
540
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✓ Branch 7 taken 2 times.
4511867954811371520 PetscCallHIP(hipMemcpy(dest-bv->l,d_h,(j-bv->l)*sizeof(PetscScalar),hipMemcpyDeviceToHost));
541
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
4511867954811371520 PetscCall(PetscLogGpuToCpu((j-bv->l)*sizeof(PetscScalar)));
542
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.
4511867954811371520 PetscCall(PetscLogGpuTimeEnd());
543
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
4511867954811371520 PetscCall(VecHIPRestoreArrayRead(bv->buffer,&d_a));
544 4511867954811371520 } else {
545 for (i=bv->l;i<j;i++) dest[i-bv->l] = h[bv->nc+i];
546 }
547 32381620192607731712 PetscFunctionReturn(PETSC_SUCCESS);
548 3713323445883240448 }
549