GCC Code Coverage Report


Directory: ./
File: src/sys/classes/bv/impls/hip/bvhip.hip.cpp
Date: 2025-10-04 04:19:13
Exec Total Coverage
Lines: 199 370 53.8%
Functions: 12 16 75.0%
Branches: 230 1108 20.8%

Line Branch Exec Source
1 /*
2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 SLEPc - Scalable Library for Eigenvalue Problem Computations
4 Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5
6 This file is part of SLEPc.
7 SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 */
10 /*
11 HIP-related code common to several BV impls
12 */
13
14 #include <slepc/private/bvimpl.h>
15 #include <slepccupmblas.h>
16
17 #define BLOCKSIZE 64
18
19 /*
20 C := alpha*A*B + beta*C
21 */
22 PetscErrorCode BVMult_BLAS_HIP(BV,PetscInt m_,PetscInt n_,PetscInt k_,PetscScalar alpha,const PetscScalar *d_A,PetscInt lda_,const PetscScalar *d_B,PetscInt ldb_,PetscScalar beta,PetscScalar *d_C,PetscInt ldc_)
23 {
24 PetscHipBLASInt m=0,n=0,k=0,lda=0,ldb=0,ldc=0;
25 hipblasHandle_t hipblashandle;
26
27 PetscFunctionBegin;
28 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
29 PetscCall(PetscHipBLASIntCast(m_,&m));
30 PetscCall(PetscHipBLASIntCast(n_,&n));
31 PetscCall(PetscHipBLASIntCast(k_,&k));
32 PetscCall(PetscHipBLASIntCast(lda_,&lda));
33 PetscCall(PetscHipBLASIntCast(ldb_,&ldb));
34 PetscCall(PetscHipBLASIntCast(ldc_,&ldc));
35 PetscCall(PetscLogGpuTimeBegin());
36 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_N,HIPBLAS_OP_N,m,n,k,&alpha,d_A,lda,d_B,ldb,&beta,d_C,ldc));
37 PetscCall(PetscLogGpuTimeEnd());
38 PetscCall(PetscLogGpuFlops(2.0*m*n*k));
39 PetscFunctionReturn(PETSC_SUCCESS);
40 }
41
42 /*
43 y := alpha*A*x + beta*y
44 */
45 8200246420960204592 PetscErrorCode BVMultVec_BLAS_HIP(BV,PetscInt n_,PetscInt k_,PetscScalar alpha,const PetscScalar *d_A,PetscInt lda_,const PetscScalar *d_x,PetscScalar beta,PetscScalar *d_y)
46 {
47 8200246420960204592 PetscHipBLASInt n=0,k=0,lda=0,one=1;
48 8200246420960204592 hipblasHandle_t hipblashandle;
49
50 PetscFunctionBegin;
51
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
8200246420960204592 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
52
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
8200246420960204592 PetscCall(PetscHipBLASIntCast(n_,&n));
53
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
8200246420960204592 PetscCall(PetscHipBLASIntCast(k_,&k));
54
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
8200246420960204592 PetscCall(PetscHipBLASIntCast(lda_,&lda));
55
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
8200246420960204592 PetscCall(PetscLogGpuTimeBegin());
56
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
8200246420960204592 PetscCallHIPBLAS(hipblasXgemv(hipblashandle,HIPBLAS_OP_N,n,k,&alpha,d_A,lda,d_x,one,&beta,d_y,one));
57
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
8200246420960204592 PetscCall(PetscLogGpuTimeEnd());
58
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
8200246420960204592 PetscCall(PetscLogGpuFlops(2.0*n*k));
59 8200246420960204592 PetscFunctionReturn(PETSC_SUCCESS);
60 8170016003200158160 }
61
62 /*
63 A(:,s:e-1) := A*B(:,s:e-1)
64 */
65 1981614622368595968 PetscErrorCode BVMultInPlace_BLAS_HIP(BV,PetscInt m_,PetscInt k_,PetscInt s,PetscInt e,PetscScalar *d_A,PetscInt lda_,const PetscScalar *d_B,PetscInt ldb_,PetscBool btrans)
66 {
67 1981614622368595968 const PetscScalar *d_B1;
68 1981614622368595968 PetscScalar *d_work,sone=1.0,szero=0.0;
69 1981614622368595968 PetscHipBLASInt m=0,n=0,k=0,l=0,lda=0,ldb=0,bs=BLOCKSIZE;
70 1981614622368595968 size_t freemem,totmem;
71 1981614622368595968 hipblasHandle_t hipblashandle;
72 1981614622368595968 hipblasOperation_t bt;
73
74 PetscFunctionBegin;
75
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
1981614622368595968 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
76
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
1981614622368595968 PetscCall(PetscHipBLASIntCast(m_,&m));
77
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
1981614622368595968 PetscCall(PetscHipBLASIntCast(e-s,&n));
78
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
1981614622368595968 PetscCall(PetscHipBLASIntCast(k_,&k));
79
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
1981614622368595968 PetscCall(PetscHipBLASIntCast(lda_,&lda));
80
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
1981614622368595968 PetscCall(PetscHipBLASIntCast(ldb_,&ldb));
81
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
1981614622368595968 PetscCall(PetscLogGpuTimeBegin());
82
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1981614622368595968 if (PetscUnlikely(btrans)) {
83 d_B1 = d_B+s;
84 bt = HIPBLAS_OP_C;
85 } else {
86 1981614622368595968 d_B1 = d_B+s*ldb;
87 1981614622368595968 bt = HIPBLAS_OP_N;
88 }
89 /* try to allocate the whole matrix */
90
2/8
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
1981614622368595968 PetscCallHIP(hipMemGetInfo(&freemem,&totmem));
91
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1981614622368595968 if (freemem>=lda*n*sizeof(PetscScalar)) {
92
2/8
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
1981614622368595968 PetscCallHIP(hipMalloc((void**)&d_work,lda*n*sizeof(PetscScalar)));
93
2/8
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
1981614622368595968 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_N,bt,m,n,k,&sone,d_A,lda,d_B1,ldb,&szero,d_work,lda));
94
2/8
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
1981614622368595968 PetscCallHIP(hipMemcpy2D(d_A+s*lda,lda*sizeof(PetscScalar),d_work,lda*sizeof(PetscScalar),m*sizeof(PetscScalar),n,hipMemcpyDeviceToDevice));
95 1981614622368595968 } else {
96 PetscCall(PetscHipBLASIntCast(freemem/(m*sizeof(PetscScalar)),&bs));
97 PetscCallHIP(hipMalloc((void**)&d_work,bs*n*sizeof(PetscScalar)));
98 PetscCall(PetscHipBLASIntCast(m % bs,&l));
99 if (l) {
100 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_N,bt,l,n,k,&sone,d_A,lda,d_B1,ldb,&szero,d_work,l));
101 PetscCallHIP(hipMemcpy2D(d_A+s*lda,lda*sizeof(PetscScalar),d_work,l*sizeof(PetscScalar),l*sizeof(PetscScalar),n,hipMemcpyDeviceToDevice));
102 }
103 for (;l<m;l+=bs) {
104 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_N,bt,bs,n,k,&sone,d_A+l,lda,d_B1,ldb,&szero,d_work,bs));
105 PetscCallHIP(hipMemcpy2D(d_A+l+s*lda,lda*sizeof(PetscScalar),d_work,bs*sizeof(PetscScalar),bs*sizeof(PetscScalar),n,hipMemcpyDeviceToDevice));
106 }
107 }
108
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
1981614622368595968 PetscCall(PetscLogGpuTimeEnd());
109
2/8
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
1981614622368595968 PetscCallHIP(hipFree(d_work));
110
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
1981614622368595968 PetscCall(PetscLogGpuFlops(2.0*m*n*k));
111 1981614622368595968 PetscFunctionReturn(PETSC_SUCCESS);
112 1981614622368595968 }
113
114 /*
115 B := alpha*A + beta*B
116 */
117 PetscErrorCode BVAXPY_BLAS_HIP(BV,PetscInt n_,PetscInt k_,PetscScalar alpha,const PetscScalar *d_A,PetscInt lda_,PetscScalar beta,PetscScalar *d_B,PetscInt ldb_)
118 {
119 PetscHipBLASInt n=0,k=0,lda=0,ldb=0;
120 hipblasHandle_t hipblashandle;
121
122 PetscFunctionBegin;
123 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
124 PetscCall(PetscHipBLASIntCast(n_,&n));
125 PetscCall(PetscHipBLASIntCast(k_,&k));
126 PetscCall(PetscHipBLASIntCast(lda_,&lda));
127 PetscCall(PetscHipBLASIntCast(ldb_,&ldb));
128 PetscCall(PetscLogGpuTimeBegin());
129 PetscCallHIPBLAS(hipblasXgeam(hipblashandle,HIPBLAS_OP_N,HIPBLAS_OP_N,n,k,&alpha,d_A,lda,&beta,d_B,ldb,d_B,ldb));
130 PetscCall(PetscLogGpuTimeEnd());
131 PetscCall(PetscLogGpuFlops((beta==(PetscScalar)1.0)?2.0*n*k:3.0*n*k));
132 PetscFunctionReturn(PETSC_SUCCESS);
133 }
134
135 /*
136 C := A'*B
137
138 C is a CPU array
139 */
140 PetscErrorCode BVDot_BLAS_HIP(BV bv,PetscInt m_,PetscInt n_,PetscInt k_,const PetscScalar *d_A,PetscInt lda_,const PetscScalar *d_B,PetscInt ldb_,PetscScalar *C,PetscInt ldc_,PetscBool mpi)
141 {
142 PetscScalar *d_work,sone=1.0,szero=0.0,*CC;
143 PetscInt j;
144 PetscHipBLASInt m=0,n=0,k=0,lda=0,ldb=0,ldc=0;
145 PetscMPIInt len;
146 hipblasHandle_t hipblashandle;
147
148 PetscFunctionBegin;
149 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
150 PetscCall(PetscHipBLASIntCast(m_,&m));
151 PetscCall(PetscHipBLASIntCast(n_,&n));
152 PetscCall(PetscHipBLASIntCast(k_,&k));
153 PetscCall(PetscHipBLASIntCast(lda_,&lda));
154 PetscCall(PetscHipBLASIntCast(ldb_,&ldb));
155 PetscCall(PetscHipBLASIntCast(ldc_,&ldc));
156 PetscCallHIP(hipMalloc((void**)&d_work,m*n*sizeof(PetscScalar)));
157 if (mpi) {
158 if (ldc==m) {
159 PetscCall(BVAllocateWork_Private(bv,m*n));
160 if (k) {
161 PetscCall(PetscLogGpuTimeBegin());
162 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_C,HIPBLAS_OP_N,m,n,k,&sone,d_A,lda,d_B,ldb,&szero,d_work,ldc));
163 PetscCall(PetscLogGpuTimeEnd());
164 PetscCallHIP(hipMemcpy(bv->work,d_work,m*n*sizeof(PetscScalar),hipMemcpyDeviceToHost));
165 PetscCall(PetscLogGpuToCpu(m*n*sizeof(PetscScalar)));
166 } else PetscCall(PetscArrayzero(bv->work,m*n));
167 PetscCall(PetscMPIIntCast(m*n,&len));
168 PetscCallMPI(MPIU_Allreduce(bv->work,C,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv)));
169 } else {
170 PetscCall(BVAllocateWork_Private(bv,2*m*n));
171 CC = bv->work+m*n;
172 if (k) {
173 PetscCall(PetscLogGpuTimeBegin());
174 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_C,HIPBLAS_OP_N,m,n,k,&sone,d_A,lda,d_B,ldb,&szero,d_work,m));
175 PetscCall(PetscLogGpuTimeEnd());
176 PetscCallHIP(hipMemcpy(bv->work,d_work,m*n*sizeof(PetscScalar),hipMemcpyDeviceToHost));
177 PetscCall(PetscLogGpuToCpu(m*n*sizeof(PetscScalar)));
178 } else PetscCall(PetscArrayzero(bv->work,m*n));
179 PetscCall(PetscMPIIntCast(m*n,&len));
180 PetscCallMPI(MPIU_Allreduce(bv->work,CC,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv)));
181 for (j=0;j<n;j++) PetscCall(PetscArraycpy(C+j*ldc,CC+j*m,m));
182 }
183 } else {
184 if (k) {
185 PetscCall(BVAllocateWork_Private(bv,m*n));
186 PetscCall(PetscLogGpuTimeBegin());
187 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_C,HIPBLAS_OP_N,m,n,k,&sone,d_A,lda,d_B,ldb,&szero,d_work,m));
188 PetscCall(PetscLogGpuTimeEnd());
189 PetscCallHIP(hipMemcpy(bv->work,d_work,m*n*sizeof(PetscScalar),hipMemcpyDeviceToHost));
190 PetscCall(PetscLogGpuToCpu(m*n*sizeof(PetscScalar)));
191 for (j=0;j<n;j++) PetscCall(PetscArraycpy(C+j*ldc,bv->work+j*m,m));
192 }
193 }
194 PetscCallHIP(hipFree(d_work));
195 PetscCall(PetscLogGpuFlops(2.0*m*n*k));
196 PetscFunctionReturn(PETSC_SUCCESS);
197 }
198
199 /*
200 y := A'*x
201
202 y is a CPU array, if NULL bv->buffer is used as a workspace
203 */
204 3576595478344653636 PetscErrorCode BVDotVec_BLAS_HIP(BV bv,PetscInt n_,PetscInt k_,const PetscScalar *d_A,PetscInt lda_,const PetscScalar *d_x,PetscScalar *y,PetscBool mpi)
205 {
206 3576595478344653636 PetscScalar *d_work,szero=0.0,sone=1.0,*yy;
207 3576595478344653636 PetscHipBLASInt n=0,k=0,lda=0,one=1;
208 3576595478344653636 PetscMPIInt len;
209 3576595478344653636 hipblasHandle_t hipblashandle;
210
211 PetscFunctionBegin;
212
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
213
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscHipBLASIntCast(n_,&n));
214
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscHipBLASIntCast(k_,&k));
215
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscHipBLASIntCast(lda_,&lda));
216
3/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
3576595478344653636 if (!y) PetscCall(VecHIPGetArrayWrite(bv->buffer,&d_work));
217 else PetscCallHIP(hipMalloc((void**)&d_work,k*sizeof(PetscScalar)));
218
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
3576595478344653636 if (mpi) {
219 PetscCall(BVAllocateWork_Private(bv,k));
220 if (n) {
221 PetscCall(PetscLogGpuTimeBegin());
222 PetscCallHIPBLAS(hipblasXgemv(hipblashandle,HIPBLAS_OP_C,n,k,&sone,d_A,lda,d_x,one,&szero,d_work,one));
223 PetscCall(PetscLogGpuTimeEnd());
224 PetscCallHIP(hipMemcpy(bv->work,d_work,k*sizeof(PetscScalar),hipMemcpyDeviceToHost));
225 PetscCall(PetscLogGpuToCpu(k*sizeof(PetscScalar)));
226 } else PetscCall(PetscArrayzero(bv->work,k));
227 /* reduction */
228 PetscCall(PetscMPIIntCast(k,&len));
229 if (!y) {
230 if (use_gpu_aware_mpi) { /* case 1: reduce on GPU using a temporary buffer */
231 PetscCallHIP(hipMalloc((void**)&yy,k*sizeof(PetscScalar)));
232 PetscCallMPI(MPIU_Allreduce(d_work,yy,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv)));
233 PetscCallHIP(hipMemcpy(d_work,yy,k*sizeof(PetscScalar),hipMemcpyDeviceToDevice));
234 PetscCallHIP(hipFree(yy));
235 } else { /* case 2: reduce on CPU, copy result back to GPU */
236 PetscCall(BVAllocateWork_Private(bv,2*k));
237 yy = bv->work+k;
238 PetscCallHIP(hipMemcpy(bv->work,d_work,k*sizeof(PetscScalar),hipMemcpyDeviceToHost));
239 PetscCall(PetscLogGpuToCpu(k*sizeof(PetscScalar)));
240 PetscCallMPI(MPIU_Allreduce(bv->work,yy,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv)));
241 PetscCallHIP(hipMemcpy(d_work,yy,k*sizeof(PetscScalar),hipMemcpyHostToDevice));
242 PetscCall(PetscLogCpuToGpu(k*sizeof(PetscScalar)));
243 }
244 PetscCall(VecHIPRestoreArrayWrite(bv->buffer,&d_work));
245 } else { /* case 3: user-provided array y, reduce on CPU */
246 PetscCallHIP(hipFree(d_work));
247 PetscCallMPI(MPIU_Allreduce(bv->work,y,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv)));
248 }
249 } else {
250
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
3576595478344653636 if (n) {
251
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscLogGpuTimeBegin());
252
2/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
3576595478344653636 PetscCallHIPBLAS(hipblasXgemv(hipblashandle,HIPBLAS_OP_C,n,k,&sone,d_A,lda,d_x,one,&szero,d_work,one));
253
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscLogGpuTimeEnd());
254 3576595478344653636 }
255
3/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
3576595478344653636 if (!y) PetscCall(VecHIPRestoreArrayWrite(bv->buffer,&d_work));
256 else {
257 PetscCallHIP(hipMemcpy(y,d_work,k*sizeof(PetscScalar),hipMemcpyDeviceToHost));
258 PetscCall(PetscLogGpuToCpu(k*sizeof(PetscScalar)));
259 PetscCallHIP(hipFree(d_work));
260 }
261 }
262
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscLogGpuFlops(2.0*n*k));
263 3576595478344653636 PetscFunctionReturn(PETSC_SUCCESS);
264 3576595478344653636 }
265
266 /*
267 Scale n scalars
268 */
269 4881539584850144524 PetscErrorCode BVScale_BLAS_HIP(BV,PetscInt n_,PetscScalar *d_A,PetscScalar alpha)
270 {
271 4881539584850144524 PetscHipBLASInt n=0,one=1;
272 4881539584850144524 hipblasHandle_t hipblashandle;
273
274 PetscFunctionBegin;
275
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4881539584850144524 PetscCall(PetscHipBLASIntCast(n_,&n));
276
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
4881539584850144524 if (PetscUnlikely(alpha == (PetscScalar)0.0)) PetscCallHIP(hipMemset(d_A,0,n*sizeof(PetscScalar)));
277
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4881539584850144524 else if (alpha != (PetscScalar)1.0) {
278
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4881539584850144524 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
279
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4881539584850144524 PetscCall(PetscLogGpuTimeBegin());
280
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
4881539584850144524 PetscCallHIPBLAS(hipblasXscal(hipblashandle,n,&alpha,d_A,one));
281
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4881539584850144524 PetscCall(PetscLogGpuTimeEnd());
282
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4881539584850144524 PetscCall(PetscLogGpuFlops(1.0*n));
283 4881539584850144524 }
284 13565218636481703924 PetscFunctionReturn(PETSC_SUCCESS);
285 1643381673510846076 }
286
287 /*
288 Compute 2-norm of vector consisting of n scalars
289 */
290 396 PetscErrorCode BVNorm_BLAS_HIP(BV,PetscInt n_,const PetscScalar *d_A,PetscReal *nrm)
291 {
292 396 PetscHipBLASInt n=0,one=1;
293 396 hipblasHandle_t hipblashandle;
294
295 PetscFunctionBegin;
296
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
396 PetscCall(PetscHipBLASIntCast(n_,&n));
297
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
396 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
298
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
396 PetscCall(PetscLogGpuTimeBegin());
299
2/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
396 PetscCallHIPBLAS(hipblasXnrm2(hipblashandle,n,d_A,one,nrm));
300
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
396 PetscCall(PetscLogGpuTimeEnd());
301
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
396 PetscCall(PetscLogGpuFlops(2.0*n));
302 396 PetscFunctionReturn(PETSC_SUCCESS);
303 396 }
304
305 /*
306 Normalize the columns of A
307 */
308 5107089674019536896 PetscErrorCode BVNormalize_BLAS_HIP(BV,PetscInt m_,PetscInt n_,PetscScalar *d_A,PetscInt lda_,PetscScalar *eigi)
309 {
310 5107089674019536896 PetscInt j,k;
311 5107089674019536896 PetscReal nrm,nrm1;
312 5107089674019536896 PetscScalar alpha;
313 5107089674019536896 PetscHipBLASInt m=0,one=1;
314 5107089674019536896 hipblasHandle_t hipblashandle;
315
316 PetscFunctionBegin;
317
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
5107089674019536896 PetscCall(PetscHipBLASIntCast(m_,&m));
318
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
5107089674019536896 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
319
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
5107089674019536896 PetscCall(PetscLogGpuTimeBegin());
320
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
7088704296388132864 for (j=0;j<n_;j++) {
321 1981614622368595968 k = 1;
322 #if !defined(PETSC_USE_COMPLEX)
323
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
1981614622368595968 if (eigi && eigi[j] != 0.0) k = 2;
324 #endif
325
2/8
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
1981614622368595968 PetscCallHIPBLAS(hipblasXnrm2(hipblashandle,m,d_A+j*lda_,one,&nrm));
326
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1981614622368595968 if (k==2) {
327 PetscCallHIPBLAS(hipblasXnrm2(hipblashandle,m,d_A+(j+1)*lda_,one,&nrm1));
328 nrm = SlepcAbs(nrm,nrm1);
329 }
330 1981614622368595968 alpha = 1.0/nrm;
331
2/8
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
1981614622368595968 PetscCallHIPBLAS(hipblasXscal(hipblashandle,m,&alpha,d_A+j*lda_,one));
332
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1981614622368595968 if (k==2) {
333 PetscCallHIPBLAS(hipblasXscal(hipblashandle,m,&alpha,d_A+(j+1)*lda_,one));
334 j++;
335 }
336 1981614622368595968 }
337
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
5107089674019536896 PetscCall(PetscLogGpuTimeEnd());
338
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
5107089674019536896 PetscCall(PetscLogGpuFlops(3.0*m*n_));
339 5107089674019536896 PetscFunctionReturn(PETSC_SUCCESS);
340 5107089674019536896 }
341
342 /*
343 BV_CleanCoefficients_HIP - Sets to zero all entries of column j of the bv buffer
344 */
345 4881539469691333528 PetscErrorCode BV_CleanCoefficients_HIP(BV bv,PetscInt j,PetscScalar *h)
346 {
347 4881539469691333528 PetscScalar *d_hh,*d_a;
348 4881539469691333528 PetscInt i;
349
350 PetscFunctionBegin;
351
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4881539469691333528 if (!h) {
352
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4881539469691333528 PetscCall(VecHIPGetArray(bv->buffer,&d_a));
353
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
4881539469691333528 PetscCall(PetscLogGpuTimeBegin());
354 4881539469691333528 d_hh = d_a + j*(bv->nc+bv->m);
355
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
4881539469691333528 PetscCallHIP(hipMemset(d_hh,0,(bv->nc+j)*sizeof(PetscScalar)));
356
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4881539469691333528 PetscCall(PetscLogGpuTimeEnd());
357
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4881539469691333528 PetscCall(VecHIPRestoreArray(bv->buffer,&d_a));
358 4881539469691333528 } else { /* cpu memory */
359 for (i=0;i<bv->nc+j;i++) h[i] = 0.0;
360 }
361 13565218519712280168 PetscFunctionReturn(PETSC_SUCCESS);
362 2722767522356016856 }
363
364 /*
365 BV_AddCoefficients_HIP - Add the contents of the scratch (0-th column) of the bv buffer
366 into column j of the bv buffer
367 */
368 3576595478344653636 PetscErrorCode BV_AddCoefficients_HIP(BV bv,PetscInt j,PetscScalar *h,PetscScalar *c)
369 {
370 3576595478344653636 PetscScalar *d_h,*d_c,sone=1.0;
371 3576595478344653636 PetscInt i;
372 3576595478344653636 PetscHipBLASInt idx=0,one=1;
373 3576595478344653636 hipblasHandle_t hipblashandle;
374
375 PetscFunctionBegin;
376
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
3576595478344653636 if (!h) {
377
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
378
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(VecHIPGetArray(bv->buffer,&d_c));
379 3576595478344653636 d_h = d_c + j*(bv->nc+bv->m);
380
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscHipBLASIntCast(bv->nc+j,&idx));
381
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscLogGpuTimeBegin());
382
2/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
3576595478344653636 PetscCallHIPBLAS(hipblasXaxpy(hipblashandle,idx,&sone,d_c,one,d_h,one));
383
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscLogGpuTimeEnd());
384
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscLogGpuFlops(1.0*(bv->nc+j)));
385
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(VecHIPRestoreArray(bv->buffer,&d_c));
386 3576595478344653636 } else { /* cpu memory */
387 for (i=0;i<bv->nc+j;i++) h[i] += c[i];
388 PetscCall(PetscLogFlops(1.0*(bv->nc+j)));
389 }
390 3576595478344653636 PetscFunctionReturn(PETSC_SUCCESS);
391 3576595478344653636 }
392
393 /*
394 BV_SetValue_HIP - Sets value in row j (counted after the constraints) of column k
395 of the coefficients array
396 */
397 4881539469691333528 PetscErrorCode BV_SetValue_HIP(BV bv,PetscInt j,PetscInt k,PetscScalar *h,PetscScalar value)
398 {
399 4881539469691333528 PetscScalar *d_h,*a;
400
401 PetscFunctionBegin;
402
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4881539469691333528 if (!h) {
403
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4881539469691333528 PetscCall(VecHIPGetArray(bv->buffer,&a));
404
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4881539469691333528 PetscCall(PetscLogGpuTimeBegin());
405 4881539469691333528 d_h = a + k*(bv->nc+bv->m) + bv->nc+j;
406
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
4881539469691333528 PetscCallHIP(hipMemcpy(d_h,&value,sizeof(PetscScalar),hipMemcpyHostToDevice));
407
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4881539469691333528 PetscCall(PetscLogCpuToGpu(sizeof(PetscScalar)));
408
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4881539469691333528 PetscCall(PetscLogGpuTimeEnd());
409
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
4881539469691333528 PetscCall(VecHIPRestoreArray(bv->buffer,&a));
410 4881539469691333528 } else { /* cpu memory */
411 h[bv->nc+j] = value;
412 }
413 13565218519712280168 PetscFunctionReturn(PETSC_SUCCESS);
414 1643381548688358520 }
415
416 /*
417 BV_SquareSum_HIP - Returns the value h'*h, where h represents the contents of the
418 coefficients array (up to position j)
419 */
420 3576595478344653636 PetscErrorCode BV_SquareSum_HIP(BV bv,PetscInt j,PetscScalar *h,PetscReal *sum)
421 {
422 3576595478344653636 const PetscScalar *d_h;
423 3576595478344653636 PetscScalar dot;
424 3576595478344653636 PetscInt i;
425 3576595478344653636 PetscHipBLASInt idx=0,one=1;
426 3576595478344653636 hipblasHandle_t hipblashandle;
427
428 PetscFunctionBegin;
429
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
3576595478344653636 if (!h) {
430
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
431
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(VecHIPGetArrayRead(bv->buffer,&d_h));
432
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscHipBLASIntCast(bv->nc+j,&idx));
433
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscLogGpuTimeBegin());
434
2/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
3576595478344653636 PetscCallHIPBLAS(hipblasXdot(hipblashandle,idx,d_h,one,d_h,one,&dot));
435
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscLogGpuTimeEnd());
436
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscLogGpuFlops(2.0*(bv->nc+j)));
437 3576595478344653636 *sum = PetscRealPart(dot);
438
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(VecHIPRestoreArrayRead(bv->buffer,&d_h));
439 3576595478344653636 } else { /* cpu memory */
440 *sum = 0.0;
441 for (i=0;i<bv->nc+j;i++) *sum += PetscRealPart(h[i]*PetscConj(h[i]));
442 PetscCall(PetscLogFlops(2.0*(bv->nc+j)));
443 }
444 3576595478344653636 PetscFunctionReturn(PETSC_SUCCESS);
445 3576595478344653636 }
446
447 /* pointwise multiplication */
448 static __global__ void PointwiseMult_kernel(PetscInt xcount,PetscScalar *a,const PetscScalar *b,PetscInt n)
449 {
450 PetscInt x;
451
452 x = xcount*gridDim.x*blockDim.x+blockIdx.x*blockDim.x+threadIdx.x;
453 if (x<n) a[x] *= PetscRealPart(b[x]);
454 }
455
456 /* pointwise division */
457 static __global__ void PointwiseDiv_kernel(PetscInt xcount,PetscScalar *a,const PetscScalar *b,PetscInt n)
458 {
459 PetscInt x;
460
461 x = xcount*gridDim.x*blockDim.x+blockIdx.x*blockDim.x+threadIdx.x;
462 if (x<n) a[x] /= PetscRealPart(b[x]);
463 }
464
465 /*
466 BV_ApplySignature_HIP - Computes the pointwise product h*omega, where h represents
467 the contents of the coefficients array (up to position j) and omega is the signature;
468 if inverse=TRUE then the operation is h/omega
469 */
470 PetscErrorCode BV_ApplySignature_HIP(BV bv,PetscInt j,PetscScalar *h,PetscBool inverse)
471 {
472 PetscScalar *d_h;
473 const PetscScalar *d_omega,*omega;
474 PetscInt i,xcount;
475 dim3 blocks3d, threads3d;
476
477 PetscFunctionBegin;
478 if (!(bv->nc+j)) PetscFunctionReturn(PETSC_SUCCESS);
479 if (!h) {
480 PetscCall(VecHIPGetArray(bv->buffer,&d_h));
481 PetscCall(VecHIPGetArrayRead(bv->omega,&d_omega));
482 PetscCall(SlepcKernelSetGrid1D(bv->nc+j,&blocks3d,&threads3d,&xcount));
483 PetscCall(PetscLogGpuTimeBegin());
484 if (inverse) {
485 for (i=0;i<xcount;i++) PointwiseDiv_kernel<<<blocks3d,threads3d,0,0>>>(i,d_h,d_omega,bv->nc+j);
486 } else {
487 for (i=0;i<xcount;i++) PointwiseMult_kernel<<<blocks3d,threads3d,0,0>>>(i,d_h,d_omega,bv->nc+j);
488 }
489 PetscCallHIP(hipGetLastError());
490 PetscCall(PetscLogGpuTimeEnd());
491 PetscCall(PetscLogGpuFlops(1.0*(bv->nc+j)));
492 PetscCall(VecHIPRestoreArrayRead(bv->omega,&d_omega));
493 PetscCall(VecHIPRestoreArray(bv->buffer,&d_h));
494 } else {
495 PetscCall(VecGetArrayRead(bv->omega,&omega));
496 if (inverse) for (i=0;i<bv->nc+j;i++) h[i] /= PetscRealPart(omega[i]);
497 else for (i=0;i<bv->nc+j;i++) h[i] *= PetscRealPart(omega[i]);
498 PetscCall(VecRestoreArrayRead(bv->omega,&omega));
499 PetscCall(PetscLogFlops(1.0*(bv->nc+j)));
500 }
501 PetscFunctionReturn(PETSC_SUCCESS);
502 }
503
504 /*
505 BV_SquareRoot_HIP - Returns the square root of position j (counted after the constraints)
506 of the coefficients array
507 */
508 3576595478344653636 PetscErrorCode BV_SquareRoot_HIP(BV bv,PetscInt j,PetscScalar *h,PetscReal *beta)
509 {
510 3576595478344653636 const PetscScalar *d_h;
511 3576595478344653636 PetscScalar hh;
512
513 PetscFunctionBegin;
514
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
3576595478344653636 if (!h) {
515
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(VecHIPGetArrayRead(bv->buffer,&d_h));
516
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscLogGpuTimeBegin());
517
2/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
3576595478344653636 PetscCallHIP(hipMemcpy(&hh,d_h+bv->nc+j,sizeof(PetscScalar),hipMemcpyDeviceToHost));
518
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscLogGpuToCpu(sizeof(PetscScalar)));
519
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(PetscLogGpuTimeEnd());
520
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(BV_SafeSqrt(bv,hh,beta));
521
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3576595478344653636 PetscCall(VecHIPRestoreArrayRead(bv->buffer,&d_h));
522
0/4
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
3576595478344653636 } else PetscCall(BV_SafeSqrt(bv,h[bv->nc+j],beta));
523 3576595478344653636 PetscFunctionReturn(PETSC_SUCCESS);
524 3576595478344653636 }
525
526 /*
527 BV_StoreCoefficients_HIP - Copy the contents of the coefficients array to an array dest
528 provided by the caller (only values from l to j are copied)
529 */
530 4881532560162684928 PetscErrorCode BV_StoreCoefficients_HIP(BV bv,PetscInt j,PetscScalar *h,PetscScalar *dest)
531 {
532 4881532560162684928 const PetscScalar *d_h,*d_a;
533 4881532560162684928 PetscInt i;
534
535 PetscFunctionBegin;
536
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
4881532560162684928 if (!h) {
537
3/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 1 times.
4881532560162684928 PetscCall(VecHIPGetArrayRead(bv->buffer,&d_a));
538
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
4881532560162684928 PetscCall(PetscLogGpuTimeBegin());
539 4881532560162684928 d_h = d_a + j*(bv->nc+bv->m)+bv->nc;
540
3/8
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 1 times.
4881532560162684928 PetscCallHIP(hipMemcpy(dest-bv->l,d_h,(j-bv->l)*sizeof(PetscScalar),hipMemcpyDeviceToHost));
541
3/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 1 times.
4881532560162684928 PetscCall(PetscLogGpuToCpu((j-bv->l)*sizeof(PetscScalar)));
542
3/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 1 times.
4881532560162684928 PetscCall(PetscLogGpuTimeEnd());
543
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
4881532560162684928 PetscCall(VecHIPRestoreArrayRead(bv->buffer,&d_a));
544 4881532560162684928 } else {
545 for (i=bv->l;i<j;i++) dest[i-bv->l] = h[bv->nc+i];
546 }
547 13565211513546866688 PetscFunctionReturn(PETSC_SUCCESS);
548 2722760226280308736 }
549