GCC Code Coverage Report


Directory: ./
File: src/sys/classes/bv/impls/hip/bvhip.hip.cpp
Date: 2026-01-12 03:57:26
Exec Total Coverage
Lines: 199 370 53.8%
Functions: 12 16 75.0%
Branches: 270 1108 24.4%

Line Branch Exec Source
1 /*
2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 SLEPc - Scalable Library for Eigenvalue Problem Computations
4 Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5
6 This file is part of SLEPc.
7 SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 */
10 /*
11 HIP-related code common to several BV impls
12 */
13
14 #include <slepc/private/bvimpl.h>
15 #include <slepccupmblas.h>
16
17 #define BLOCKSIZE 64
18
19 /*
20 C := alpha*A*B + beta*C
21 */
22 PetscErrorCode BVMult_BLAS_HIP(BV,PetscInt m_,PetscInt n_,PetscInt k_,PetscScalar alpha,const PetscScalar *d_A,PetscInt lda_,const PetscScalar *d_B,PetscInt ldb_,PetscScalar beta,PetscScalar *d_C,PetscInt ldc_)
23 {
24 PetscHipBLASInt m=0,n=0,k=0,lda=0,ldb=0,ldc=0;
25 hipblasHandle_t hipblashandle;
26
27 PetscFunctionBegin;
28 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
29 PetscCall(PetscHipBLASIntCast(m_,&m));
30 PetscCall(PetscHipBLASIntCast(n_,&n));
31 PetscCall(PetscHipBLASIntCast(k_,&k));
32 PetscCall(PetscHipBLASIntCast(lda_,&lda));
33 PetscCall(PetscHipBLASIntCast(ldb_,&ldb));
34 PetscCall(PetscHipBLASIntCast(ldc_,&ldc));
35 PetscCall(PetscLogGpuTimeBegin());
36 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_N,HIPBLAS_OP_N,m,n,k,&alpha,d_A,lda,d_B,ldb,&beta,d_C,ldc));
37 PetscCall(PetscLogGpuTimeEnd());
38 PetscCall(PetscLogGpuFlops(2.0*m*n*k));
39 PetscFunctionReturn(PETSC_SUCCESS);
40 }
41
42 /*
43 y := alpha*A*x + beta*y
44 */
45 7852076831199974864 PetscErrorCode BVMultVec_BLAS_HIP(BV,PetscInt n_,PetscInt k_,PetscScalar alpha,const PetscScalar *d_A,PetscInt lda_,const PetscScalar *d_x,PetscScalar beta,PetscScalar *d_y)
46 {
47 7852076831199974864 PetscHipBLASInt n=0,k=0,lda=0,one=1;
48 7852076831199974864 hipblasHandle_t hipblashandle;
49
50 PetscFunctionBegin;
51
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
7852076831199974864 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
52
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
7852076831199974864 PetscCall(PetscHipBLASIntCast(n_,&n));
53
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
7852076831199974864 PetscCall(PetscHipBLASIntCast(k_,&k));
54
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
7852076831199974864 PetscCall(PetscHipBLASIntCast(lda_,&lda));
55
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
7852076831199974864 PetscCall(PetscLogGpuTimeBegin());
56
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
7852076831199974864 PetscCallHIPBLAS(hipblasXgemv(hipblashandle,HIPBLAS_OP_N,n,k,&alpha,d_A,lda,d_x,one,&beta,d_y,one));
57
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
7852076831199974864 PetscCall(PetscLogGpuTimeEnd());
58
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
7852076831199974864 PetscCall(PetscLogGpuFlops(2.0*n*k));
59 7852076831199974864 PetscFunctionReturn(PETSC_SUCCESS);
60 13616647521131388880 }
61
62 /*
63 A(:,s:e-1) := A*B(:,s:e-1)
64 */
65 9736782703612657664 PetscErrorCode BVMultInPlace_BLAS_HIP(BV,PetscInt m_,PetscInt k_,PetscInt s,PetscInt e,PetscScalar *d_A,PetscInt lda_,const PetscScalar *d_B,PetscInt ldb_,PetscBool btrans)
66 {
67 9736782703612657664 const PetscScalar *d_B1;
68 9736782703612657664 PetscScalar *d_work,sone=1.0,szero=0.0;
69 9736782703612657664 PetscHipBLASInt m=0,n=0,k=0,l=0,lda=0,ldb=0,bs=BLOCKSIZE;
70 9736782703612657664 size_t freemem,totmem;
71 9736782703612657664 hipblasHandle_t hipblashandle;
72 9736782703612657664 hipblasOperation_t bt;
73
74 PetscFunctionBegin;
75
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
9736782703612657664 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
76
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
9736782703612657664 PetscCall(PetscHipBLASIntCast(m_,&m));
77
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
9736782703612657664 PetscCall(PetscHipBLASIntCast(e-s,&n));
78
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
9736782703612657664 PetscCall(PetscHipBLASIntCast(k_,&k));
79
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
9736782703612657664 PetscCall(PetscHipBLASIntCast(lda_,&lda));
80
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
9736782703612657664 PetscCall(PetscHipBLASIntCast(ldb_,&ldb));
81
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
9736782703612657664 PetscCall(PetscLogGpuTimeBegin());
82
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
9736782703612657664 if (PetscUnlikely(btrans)) {
83 d_B1 = d_B+s;
84 bt = HIPBLAS_OP_C;
85 } else {
86 9736782703612657664 d_B1 = d_B+s*ldb;
87 9736782703612657664 bt = HIPBLAS_OP_N;
88 }
89 /* try to allocate the whole matrix */
90
2/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
14348468722040045568 PetscCallHIP(hipMemGetInfo(&freemem,&totmem));
91
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
9736782703612657664 if (freemem>=lda*n*sizeof(PetscScalar)) {
92
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
9736782703612657664 PetscCallHIP(hipMalloc((void**)&d_work,lda*n*sizeof(PetscScalar)));
93
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
9736782703612657664 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_N,bt,m,n,k,&sone,d_A,lda,d_B1,ldb,&szero,d_work,lda));
94
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
9736782703612657664 PetscCallHIP(hipMemcpy2D(d_A+s*lda,lda*sizeof(PetscScalar),d_work,lda*sizeof(PetscScalar),m*sizeof(PetscScalar),n,hipMemcpyDeviceToDevice));
95 9736782703612657664 } else {
96 PetscCall(PetscHipBLASIntCast(freemem/(m*sizeof(PetscScalar)),&bs));
97 PetscCallHIP(hipMalloc((void**)&d_work,bs*n*sizeof(PetscScalar)));
98 PetscCall(PetscHipBLASIntCast(m % bs,&l));
99 if (l) {
100 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_N,bt,l,n,k,&sone,d_A,lda,d_B1,ldb,&szero,d_work,l));
101 PetscCallHIP(hipMemcpy2D(d_A+s*lda,lda*sizeof(PetscScalar),d_work,l*sizeof(PetscScalar),l*sizeof(PetscScalar),n,hipMemcpyDeviceToDevice));
102 }
103 for (;l<m;l+=bs) {
104 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_N,bt,bs,n,k,&sone,d_A+l,lda,d_B1,ldb,&szero,d_work,bs));
105 PetscCallHIP(hipMemcpy2D(d_A+l+s*lda,lda*sizeof(PetscScalar),d_work,bs*sizeof(PetscScalar),bs*sizeof(PetscScalar),n,hipMemcpyDeviceToDevice));
106 }
107 }
108
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
14348468722040045568 PetscCall(PetscLogGpuTimeEnd());
109
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
9736782703612657664 PetscCallHIP(hipFree(d_work));
110
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
9736782703612657664 PetscCall(PetscLogGpuFlops(2.0*m*n*k));
111 9736782703612657664 PetscFunctionReturn(PETSC_SUCCESS);
112 14348468722040045568 }
113
114 /*
115 B := alpha*A + beta*B
116 */
117 PetscErrorCode BVAXPY_BLAS_HIP(BV,PetscInt n_,PetscInt k_,PetscScalar alpha,const PetscScalar *d_A,PetscInt lda_,PetscScalar beta,PetscScalar *d_B,PetscInt ldb_)
118 {
119 PetscHipBLASInt n=0,k=0,lda=0,ldb=0;
120 hipblasHandle_t hipblashandle;
121
122 PetscFunctionBegin;
123 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
124 PetscCall(PetscHipBLASIntCast(n_,&n));
125 PetscCall(PetscHipBLASIntCast(k_,&k));
126 PetscCall(PetscHipBLASIntCast(lda_,&lda));
127 PetscCall(PetscHipBLASIntCast(ldb_,&ldb));
128 PetscCall(PetscLogGpuTimeBegin());
129 PetscCallHIPBLAS(hipblasXgeam(hipblashandle,HIPBLAS_OP_N,HIPBLAS_OP_N,n,k,&alpha,d_A,lda,&beta,d_B,ldb,d_B,ldb));
130 PetscCall(PetscLogGpuTimeEnd());
131 PetscCall(PetscLogGpuFlops((beta==(PetscScalar)1.0)?2.0*n*k:3.0*n*k));
132 PetscFunctionReturn(PETSC_SUCCESS);
133 }
134
135 /*
136 C := A'*B
137
138 C is a CPU array
139 */
140 PetscErrorCode BVDot_BLAS_HIP(BV bv,PetscInt m_,PetscInt n_,PetscInt k_,const PetscScalar *d_A,PetscInt lda_,const PetscScalar *d_B,PetscInt ldb_,PetscScalar *C,PetscInt ldc_,PetscBool mpi)
141 {
142 PetscScalar *d_work,sone=1.0,szero=0.0,*CC;
143 PetscInt j;
144 PetscHipBLASInt m=0,n=0,k=0,lda=0,ldb=0,ldc=0;
145 PetscMPIInt len;
146 hipblasHandle_t hipblashandle;
147
148 PetscFunctionBegin;
149 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
150 PetscCall(PetscHipBLASIntCast(m_,&m));
151 PetscCall(PetscHipBLASIntCast(n_,&n));
152 PetscCall(PetscHipBLASIntCast(k_,&k));
153 PetscCall(PetscHipBLASIntCast(lda_,&lda));
154 PetscCall(PetscHipBLASIntCast(ldb_,&ldb));
155 PetscCall(PetscHipBLASIntCast(ldc_,&ldc));
156 PetscCallHIP(hipMalloc((void**)&d_work,m*n*sizeof(PetscScalar)));
157 if (mpi) {
158 if (ldc==m) {
159 PetscCall(BVAllocateWork_Private(bv,m*n));
160 if (k) {
161 PetscCall(PetscLogGpuTimeBegin());
162 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_C,HIPBLAS_OP_N,m,n,k,&sone,d_A,lda,d_B,ldb,&szero,d_work,ldc));
163 PetscCall(PetscLogGpuTimeEnd());
164 PetscCallHIP(hipMemcpy(bv->work,d_work,m*n*sizeof(PetscScalar),hipMemcpyDeviceToHost));
165 PetscCall(PetscLogGpuToCpu(m*n*sizeof(PetscScalar)));
166 } else PetscCall(PetscArrayzero(bv->work,m*n));
167 PetscCall(PetscMPIIntCast(m*n,&len));
168 PetscCallMPI(MPIU_Allreduce(bv->work,C,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv)));
169 } else {
170 PetscCall(BVAllocateWork_Private(bv,2*m*n));
171 CC = bv->work+m*n;
172 if (k) {
173 PetscCall(PetscLogGpuTimeBegin());
174 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_C,HIPBLAS_OP_N,m,n,k,&sone,d_A,lda,d_B,ldb,&szero,d_work,m));
175 PetscCall(PetscLogGpuTimeEnd());
176 PetscCallHIP(hipMemcpy(bv->work,d_work,m*n*sizeof(PetscScalar),hipMemcpyDeviceToHost));
177 PetscCall(PetscLogGpuToCpu(m*n*sizeof(PetscScalar)));
178 } else PetscCall(PetscArrayzero(bv->work,m*n));
179 PetscCall(PetscMPIIntCast(m*n,&len));
180 PetscCallMPI(MPIU_Allreduce(bv->work,CC,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv)));
181 for (j=0;j<n;j++) PetscCall(PetscArraycpy(C+j*ldc,CC+j*m,m));
182 }
183 } else {
184 if (k) {
185 PetscCall(BVAllocateWork_Private(bv,m*n));
186 PetscCall(PetscLogGpuTimeBegin());
187 PetscCallHIPBLAS(hipblasXgemm(hipblashandle,HIPBLAS_OP_C,HIPBLAS_OP_N,m,n,k,&sone,d_A,lda,d_B,ldb,&szero,d_work,m));
188 PetscCall(PetscLogGpuTimeEnd());
189 PetscCallHIP(hipMemcpy(bv->work,d_work,m*n*sizeof(PetscScalar),hipMemcpyDeviceToHost));
190 PetscCall(PetscLogGpuToCpu(m*n*sizeof(PetscScalar)));
191 for (j=0;j<n;j++) PetscCall(PetscArraycpy(C+j*ldc,bv->work+j*m,m));
192 }
193 }
194 PetscCallHIP(hipFree(d_work));
195 PetscCall(PetscLogGpuFlops(2.0*m*n*k));
196 PetscFunctionReturn(PETSC_SUCCESS);
197 }
198
199 /*
200 y := A'*x
201
202 y is a CPU array, if NULL bv->buffer is used as a workspace
203 */
204 17444697849731208836 PetscErrorCode BVDotVec_BLAS_HIP(BV bv,PetscInt n_,PetscInt k_,const PetscScalar *d_A,PetscInt lda_,const PetscScalar *d_x,PetscScalar *y,PetscBool mpi)
205 {
206 17444697849731208836 PetscScalar *d_work,szero=0.0,sone=1.0,*yy;
207 17444697849731208836 PetscHipBLASInt n=0,k=0,lda=0,one=1;
208 17444697849731208836 PetscMPIInt len;
209 17444697849731208836 hipblasHandle_t hipblashandle;
210
211 PetscFunctionBegin;
212
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
213
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscHipBLASIntCast(n_,&n));
214
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscHipBLASIntCast(k_,&k));
215
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscHipBLASIntCast(lda_,&lda));
216
4/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 2 times.
17444697849731208836 if (!y) PetscCall(VecHIPGetArrayWrite(bv->buffer,&d_work));
217 else PetscCallHIP(hipMalloc((void**)&d_work,k*sizeof(PetscScalar)));
218
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
19750551208742389636 if (mpi) {
219 PetscCall(BVAllocateWork_Private(bv,k));
220 if (n) {
221 PetscCall(PetscLogGpuTimeBegin());
222 PetscCallHIPBLAS(hipblasXgemv(hipblashandle,HIPBLAS_OP_C,n,k,&sone,d_A,lda,d_x,one,&szero,d_work,one));
223 PetscCall(PetscLogGpuTimeEnd());
224 PetscCallHIP(hipMemcpy(bv->work,d_work,k*sizeof(PetscScalar),hipMemcpyDeviceToHost));
225 PetscCall(PetscLogGpuToCpu(k*sizeof(PetscScalar)));
226 } else PetscCall(PetscArrayzero(bv->work,k));
227 /* reduction */
228 PetscCall(PetscMPIIntCast(k,&len));
229 if (!y) {
230 if (use_gpu_aware_mpi) { /* case 1: reduce on GPU using a temporary buffer */
231 PetscCallHIP(hipMalloc((void**)&yy,k*sizeof(PetscScalar)));
232 PetscCallMPI(MPIU_Allreduce(d_work,yy,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv)));
233 PetscCallHIP(hipMemcpy(d_work,yy,k*sizeof(PetscScalar),hipMemcpyDeviceToDevice));
234 PetscCallHIP(hipFree(yy));
235 } else { /* case 2: reduce on CPU, copy result back to GPU */
236 PetscCall(BVAllocateWork_Private(bv,2*k));
237 yy = bv->work+k;
238 PetscCallHIP(hipMemcpy(bv->work,d_work,k*sizeof(PetscScalar),hipMemcpyDeviceToHost));
239 PetscCall(PetscLogGpuToCpu(k*sizeof(PetscScalar)));
240 PetscCallMPI(MPIU_Allreduce(bv->work,yy,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv)));
241 PetscCallHIP(hipMemcpy(d_work,yy,k*sizeof(PetscScalar),hipMemcpyHostToDevice));
242 PetscCall(PetscLogCpuToGpu(k*sizeof(PetscScalar)));
243 }
244 PetscCall(VecHIPRestoreArrayWrite(bv->buffer,&d_work));
245 } else { /* case 3: user-provided array y, reduce on CPU */
246 PetscCallHIP(hipFree(d_work));
247 PetscCallMPI(MPIU_Allreduce(bv->work,y,len,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)bv)));
248 }
249 } else {
250
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
17444697849731208836 if (n) {
251
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscLogGpuTimeBegin());
252
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
17444697849731208836 PetscCallHIPBLAS(hipblasXgemv(hipblashandle,HIPBLAS_OP_C,n,k,&sone,d_A,lda,d_x,one,&szero,d_work,one));
253
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscLogGpuTimeEnd());
254 17444697849731208836 }
255
4/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 2 times.
19750551208742389636 if (!y) PetscCall(VecHIPRestoreArrayWrite(bv->buffer,&d_work));
256 else {
257 PetscCallHIP(hipMemcpy(y,d_work,k*sizeof(PetscScalar),hipMemcpyDeviceToHost));
258 PetscCall(PetscLogGpuToCpu(k*sizeof(PetscScalar)));
259 PetscCallHIP(hipFree(d_work));
260 }
261 }
262
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscLogGpuFlops(2.0*n*k));
263 17444697849731208836 PetscFunctionReturn(PETSC_SUCCESS);
264 11680147250583151620 }
265
266 /*
267 Scale n scalars
268 */
269 10813140313896392140 PetscErrorCode BVScale_BLAS_HIP(BV,PetscInt n_,PetscScalar *d_A,PetscScalar alpha)
270 {
271 10813140313896392140 PetscHipBLASInt n=0,one=1;
272 10813140313896392140 hipblasHandle_t hipblashandle;
273
274 PetscFunctionBegin;
275
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813140313896392140 PetscCall(PetscHipBLASIntCast(n_,&n));
276
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
10813140313896392140 if (PetscUnlikely(alpha == (PetscScalar)0.0)) PetscCallHIP(hipMemset(d_A,0,n*sizeof(PetscScalar)));
277
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
10813140313896392140 else if (alpha != (PetscScalar)1.0) {
278
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813140313896392140 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
279
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813140313896392140 PetscCall(PetscLogGpuTimeBegin());
280
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
10813140313896392140 PetscCallHIPBLAS(hipblasXscal(hipblashandle,n,&alpha,d_A,one));
281
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813140313896392140 PetscCall(PetscLogGpuTimeEnd());
282
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813140313896392140 PetscCall(PetscLogGpuFlops(1.0*n));
283 10813140313896392140 }
284 20315742963167664308 PetscFunctionReturn(PETSC_SUCCESS);
285 9137756639397413980 }
286
287 /*
288 Compute 2-norm of vector consisting of n scalars
289 */
290 204 PetscErrorCode BVNorm_BLAS_HIP(BV,PetscInt n_,const PetscScalar *d_A,PetscReal *nrm)
291 {
292 204 PetscHipBLASInt n=0,one=1;
293 204 hipblasHandle_t hipblashandle;
294
295 PetscFunctionBegin;
296
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
204 PetscCall(PetscHipBLASIntCast(n_,&n));
297
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
204 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
298
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
204 PetscCall(PetscLogGpuTimeBegin());
299
2/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
204 PetscCallHIPBLAS(hipblasXnrm2(hipblashandle,n,d_A,one,nrm));
300
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
204 PetscCall(PetscLogGpuTimeEnd());
301
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
204 PetscCall(PetscLogGpuFlops(2.0*n));
302 204 PetscFunctionReturn(PETSC_SUCCESS);
303 204 }
304
305 /*
306 Normalize the columns of A
307 */
308 3587117180510011392 PetscErrorCode BVNormalize_BLAS_HIP(BV,PetscInt m_,PetscInt n_,PetscScalar *d_A,PetscInt lda_,PetscScalar *eigi)
309 {
310 3587117180510011392 PetscInt j,k;
311 3587117180510011392 PetscReal nrm,nrm1;
312 3587117180510011392 PetscScalar alpha;
313 3587117180510011392 PetscHipBLASInt m=0,one=1;
314 3587117180510011392 hipblasHandle_t hipblashandle;
315
316 PetscFunctionBegin;
317
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3587117180510011392 PetscCall(PetscHipBLASIntCast(m_,&m));
318
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3587117180510011392 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
319
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3587117180510011392 PetscCall(PetscLogGpuTimeBegin());
320
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
13323899884122669056 for (j=0;j<n_;j++) {
321 9736782703612657664 k = 1;
322 #if !defined(PETSC_USE_COMPLEX)
323
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
2819253675971575808 if (eigi && eigi[j] != 0.0) k = 2;
324 #endif
325
2/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
9736782703612657664 PetscCallHIPBLAS(hipblasXnrm2(hipblashandle,m,d_A+j*lda_,one,&nrm));
326
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
9736782703612657664 if (k==2) {
327 PetscCallHIPBLAS(hipblasXnrm2(hipblashandle,m,d_A+(j+1)*lda_,one,&nrm1));
328 nrm = SlepcAbs(nrm,nrm1);
329 }
330 9736782703612657664 alpha = 1.0/nrm;
331
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
9736782703612657664 PetscCallHIPBLAS(hipblasXscal(hipblashandle,m,&alpha,d_A+j*lda_,one));
332
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
9736782703612657664 if (k==2) {
333 PetscCallHIPBLAS(hipblasXscal(hipblashandle,m,&alpha,d_A+(j+1)*lda_,one));
334 j++;
335 }
336 9736782703612657664 }
337
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3587117180510011392 PetscCall(PetscLogGpuTimeEnd());
338
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
3587117180510011392 PetscCall(PetscLogGpuFlops(3.0*m*n_));
339 3587117180510011392 PetscFunctionReturn(PETSC_SUCCESS);
340 8198803198937399296 }
341
342 /*
343 BV_CleanCoefficients_HIP - Sets to zero all entries of column j of the bv buffer
344 */
345 10813140292266366232 PetscErrorCode BV_CleanCoefficients_HIP(BV bv,PetscInt j,PetscScalar *h)
346 {
347 10813140292266366232 PetscScalar *d_hh,*d_a;
348 10813140292266366232 PetscInt i;
349
350 PetscFunctionBegin;
351
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
10813140292266366232 if (!h) {
352
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813140292266366232 PetscCall(VecHIPGetArray(bv->buffer,&d_a));
353
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
10813140292266366232 PetscCall(PetscLogGpuTimeBegin());
354 10813140292266366232 d_hh = d_a + j*(bv->nc+bv->m);
355
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
10813140292266366232 PetscCallHIP(hipMemset(d_hh,0,(bv->nc+j)*sizeof(PetscScalar)));
356
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813140292266366232 PetscCall(PetscLogGpuTimeEnd());
357
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813140292266366232 PetscCall(VecHIPRestoreArray(bv->buffer,&d_a));
358 10813140292266366232 } else { /* cpu memory */
359 for (i=0;i<bv->nc+j;i++) h[i] = 0.0;
360 }
361 20315742941311145960 PetscFunctionReturn(PETSC_SUCCESS);
362 9696217843506350552 }
363
364 /*
365 BV_AddCoefficients_HIP - Add the contents of the scratch (0-th column) of the bv buffer
366 into column j of the bv buffer
367 */
368 17444697849731208836 PetscErrorCode BV_AddCoefficients_HIP(BV bv,PetscInt j,PetscScalar *h,PetscScalar *c)
369 {
370 17444697849731208836 PetscScalar *d_h,*d_c,sone=1.0;
371 17444697849731208836 PetscInt i;
372 17444697849731208836 PetscHipBLASInt idx=0,one=1;
373 17444697849731208836 hipblasHandle_t hipblashandle;
374
375 PetscFunctionBegin;
376
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
17444697849731208836 if (!h) {
377
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
378
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(VecHIPGetArray(bv->buffer,&d_c));
379 17444697849731208836 d_h = d_c + j*(bv->nc+bv->m);
380
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscHipBLASIntCast(bv->nc+j,&idx));
381
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscLogGpuTimeBegin());
382
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
17444697849731208836 PetscCallHIPBLAS(hipblasXaxpy(hipblashandle,idx,&sone,d_c,one,d_h,one));
383
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscLogGpuTimeEnd());
384
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscLogGpuFlops(1.0*(bv->nc+j)));
385
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(VecHIPRestoreArray(bv->buffer,&d_c));
386 17444697849731208836 } else { /* cpu memory */
387 for (i=0;i<bv->nc+j;i++) h[i] += c[i];
388 PetscCall(PetscLogFlops(1.0*(bv->nc+j)));
389 }
390 18597624529236799236 PetscFunctionReturn(PETSC_SUCCESS);
391 22056404567753570436 }
392
393 /*
394 BV_SetValue_HIP - Sets value in row j (counted after the constraints) of column k
395 of the coefficients array
396 */
397 10813140292266366232 PetscErrorCode BV_SetValue_HIP(BV bv,PetscInt j,PetscInt k,PetscScalar *h,PetscScalar value)
398 {
399 10813140292266366232 PetscScalar *d_h,*a;
400
401 PetscFunctionBegin;
402
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
10813140292266366232 if (!h) {
403
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813140292266366232 PetscCall(VecHIPGetArray(bv->buffer,&a));
404
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813140292266366232 PetscCall(PetscLogGpuTimeBegin());
405 10813140292266366232 d_h = a + k*(bv->nc+bv->m) + bv->nc+j;
406
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
10813140292266366232 PetscCallHIP(hipMemcpy(d_h,&value,sizeof(PetscScalar),hipMemcpyHostToDevice));
407
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813140292266366232 PetscCall(PetscLogCpuToGpu(sizeof(PetscScalar)));
408
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813140292266366232 PetscCall(PetscLogGpuTimeEnd());
409
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813140292266366232 PetscCall(VecHIPRestoreArray(bv->buffer,&a));
410 10813140292266366232 } else { /* cpu memory */
411 h[bv->nc+j] = value;
412 }
413 20315742941311145960 PetscFunctionReturn(PETSC_SUCCESS);
414 9137756619126342712 }
415
416 /*
417 BV_SquareSum_HIP - Returns the value h'*h, where h represents the contents of the
418 coefficients array (up to position j)
419 */
420 17444697849731208836 PetscErrorCode BV_SquareSum_HIP(BV bv,PetscInt j,PetscScalar *h,PetscReal *sum)
421 {
422 17444697849731208836 const PetscScalar *d_h;
423 17444697849731208836 PetscScalar dot;
424 17444697849731208836 PetscInt i;
425 17444697849731208836 PetscHipBLASInt idx=0,one=1;
426 17444697849731208836 hipblasHandle_t hipblashandle;
427
428 PetscFunctionBegin;
429
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
17444697849731208836 if (!h) {
430
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscHIPBLASGetHandle(&hipblashandle));
431
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(VecHIPGetArrayRead(bv->buffer,&d_h));
432
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscHipBLASIntCast(bv->nc+j,&idx));
433
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscLogGpuTimeBegin());
434
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
17444697849731208836 PetscCallHIPBLAS(hipblasXdot(hipblashandle,idx,d_h,one,d_h,one,&dot));
435
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscLogGpuTimeEnd());
436
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscLogGpuFlops(2.0*(bv->nc+j)));
437 17444697849731208836 *sum = PetscRealPart(dot);
438
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(VecHIPRestoreArrayRead(bv->buffer,&d_h));
439 17444697849731208836 } else { /* cpu memory */
440 *sum = 0.0;
441 for (i=0;i<bv->nc+j;i++) *sum += PetscRealPart(h[i]*PetscConj(h[i]));
442 PetscCall(PetscLogFlops(2.0*(bv->nc+j)));
443 }
444 18597624529236799236 PetscFunctionReturn(PETSC_SUCCESS);
445 22056404567753570436 }
446
447 /* pointwise multiplication */
448 static __global__ void PointwiseMult_kernel(PetscInt xcount,PetscScalar *a,const PetscScalar *b,PetscInt n)
449 {
450 PetscInt x;
451
452 x = xcount*gridDim.x*blockDim.x+blockIdx.x*blockDim.x+threadIdx.x;
453 if (x<n) a[x] *= PetscRealPart(b[x]);
454 }
455
456 /* pointwise division */
457 static __global__ void PointwiseDiv_kernel(PetscInt xcount,PetscScalar *a,const PetscScalar *b,PetscInt n)
458 {
459 PetscInt x;
460
461 x = xcount*gridDim.x*blockDim.x+blockIdx.x*blockDim.x+threadIdx.x;
462 if (x<n) a[x] /= PetscRealPart(b[x]);
463 }
464
465 /*
466 BV_ApplySignature_HIP - Computes the pointwise product h*omega, where h represents
467 the contents of the coefficients array (up to position j) and omega is the signature;
468 if inverse=TRUE then the operation is h/omega
469 */
470 PetscErrorCode BV_ApplySignature_HIP(BV bv,PetscInt j,PetscScalar *h,PetscBool inverse)
471 {
472 PetscScalar *d_h;
473 const PetscScalar *d_omega,*omega;
474 PetscInt i,xcount;
475 dim3 blocks3d, threads3d;
476
477 PetscFunctionBegin;
478 if (!(bv->nc+j)) PetscFunctionReturn(PETSC_SUCCESS);
479 if (!h) {
480 PetscCall(VecHIPGetArray(bv->buffer,&d_h));
481 PetscCall(VecHIPGetArrayRead(bv->omega,&d_omega));
482 PetscCall(SlepcKernelSetGrid1D(bv->nc+j,&blocks3d,&threads3d,&xcount));
483 PetscCall(PetscLogGpuTimeBegin());
484 if (inverse) {
485 for (i=0;i<xcount;i++) PointwiseDiv_kernel<<<blocks3d,threads3d,0,0>>>(i,d_h,d_omega,bv->nc+j);
486 } else {
487 for (i=0;i<xcount;i++) PointwiseMult_kernel<<<blocks3d,threads3d,0,0>>>(i,d_h,d_omega,bv->nc+j);
488 }
489 PetscCallHIP(hipGetLastError());
490 PetscCall(PetscLogGpuTimeEnd());
491 PetscCall(PetscLogGpuFlops(1.0*(bv->nc+j)));
492 PetscCall(VecHIPRestoreArrayRead(bv->omega,&d_omega));
493 PetscCall(VecHIPRestoreArray(bv->buffer,&d_h));
494 } else {
495 PetscCall(VecGetArrayRead(bv->omega,&omega));
496 if (inverse) for (i=0;i<bv->nc+j;i++) h[i] /= PetscRealPart(omega[i]);
497 else for (i=0;i<bv->nc+j;i++) h[i] *= PetscRealPart(omega[i]);
498 PetscCall(VecRestoreArrayRead(bv->omega,&omega));
499 PetscCall(PetscLogFlops(1.0*(bv->nc+j)));
500 }
501 PetscFunctionReturn(PETSC_SUCCESS);
502 }
503
504 /*
505 BV_SquareRoot_HIP - Returns the square root of position j (counted after the constraints)
506 of the coefficients array
507 */
508 17444697849731208836 PetscErrorCode BV_SquareRoot_HIP(BV bv,PetscInt j,PetscScalar *h,PetscReal *beta)
509 {
510 17444697849731208836 const PetscScalar *d_h;
511 17444697849731208836 PetscScalar hh;
512
513 PetscFunctionBegin;
514
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
17444697849731208836 if (!h) {
515
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(VecHIPGetArrayRead(bv->buffer,&d_h));
516
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscLogGpuTimeBegin());
517
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
17444697849731208836 PetscCallHIP(hipMemcpy(&hh,d_h+bv->nc+j,sizeof(PetscScalar),hipMemcpyDeviceToHost));
518
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscLogGpuToCpu(sizeof(PetscScalar)));
519
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(PetscLogGpuTimeEnd());
520
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(BV_SafeSqrt(bv,hh,beta));
521
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
17444697849731208836 PetscCall(VecHIPRestoreArrayRead(bv->buffer,&d_h));
522
0/4
✗ Branch 0 not taken.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
17444697849731208836 } else PetscCall(BV_SafeSqrt(bv,h[bv->nc+j],beta));
523 18597624529236799236 PetscFunctionReturn(PETSC_SUCCESS);
524 25515184606270341636 }
525
526 /*
527 BV_StoreCoefficients_HIP - Copy the contents of the coefficients array to an array dest
528 provided by the caller (only values from l to j are copied)
529 */
530 10813138994464817152 PetscErrorCode BV_StoreCoefficients_HIP(BV bv,PetscInt j,PetscScalar *h,PetscScalar *dest)
531 {
532 10813138994464817152 const PetscScalar *d_h,*d_a;
533 10813138994464817152 PetscInt i;
534
535 PetscFunctionBegin;
536
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
10813138994464817152 if (!h) {
537
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813138994464817152 PetscCall(VecHIPGetArrayRead(bv->buffer,&d_a));
538
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
10813138994464817152 PetscCall(PetscLogGpuTimeBegin());
539 10813138994464817152 d_h = d_a + j*(bv->nc+bv->m)+bv->nc;
540
3/8
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✓ Branch 7 taken 2 times.
10813138994464817152 PetscCallHIP(hipMemcpy(dest-bv->l,d_h,(j-bv->l)*sizeof(PetscScalar),hipMemcpyDeviceToHost));
541
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813138994464817152 PetscCall(PetscLogGpuToCpu((j-bv->l)*sizeof(PetscScalar)));
542
3/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 2 times.
10813138994464817152 PetscCall(PetscLogGpuTimeEnd());
543
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
10813138994464817152 PetscCall(VecHIPRestoreArrayRead(bv->buffer,&d_a));
544 10813138994464817152 } else {
545 for (i=bv->l;i<j;i++) dest[i-bv->l] = h[bv->nc+i];
546 }
547 20315741629920051200 PetscFunctionReturn(PETSC_SUCCESS);
548 9696216600062984192 }
549