Line | Branch | Exec | Source |
---|---|---|---|
1 | 3553 | /* | |
2 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3 | SLEPc - Scalable Library for Eigenvalue Problem Computations | ||
4 | Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain | ||
5 | |||
6 | This file is part of SLEPc. | ||
7 | SLEPc is distributed under a 2-clause BSD license (see LICENSE). | ||
8 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
9 | */ | ||
10 | /* | ||
11 | SLEPc singular value solver: "cyclic" (CUDA implementation) | ||
12 | */ | ||
13 | #include <slepc/private/svdimpl.h> | ||
14 | #include "../src/svd/impls/cyclic/cyclic.h" | ||
15 | |||
16 | 348 | PetscErrorCode MatMult_Cyclic_CUDA(Mat B,Vec x,Vec y) | |
17 | { | ||
18 | 348 | SVD_CYCLIC_SHELL *ctx; | |
19 | 348 | const PetscScalar *d_px; | |
20 | 348 | PetscScalar *d_py; | |
21 | 348 | PetscInt m; | |
22 | |||
23 | 348 | PetscFunctionBegin; | |
24 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(MatShellGetContext(B,&ctx)); |
25 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(MatGetLocalSize(ctx->A,&m,NULL)); |
26 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAGetArrayRead(x,&d_px)); |
27 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAGetArrayWrite(y,&d_py)); |
28 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAPlaceArray(ctx->x1,d_px)); |
29 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAPlaceArray(ctx->x2,d_px+m)); |
30 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAPlaceArray(ctx->y1,d_py)); |
31 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAPlaceArray(ctx->y2,d_py+m)); |
32 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
348 | if (!ctx->misaligned) { |
33 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
175 | PetscCall(MatMult(ctx->A,ctx->x2,ctx->y1)); |
34 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
175 | PetscCall(MatMult(ctx->AT,ctx->x1,ctx->y2)); |
35 | } else { /* prevent CUDA errors when bottom part is misaligned */ | ||
36 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
173 | PetscCall(VecCopy(ctx->x2,ctx->wx2)); |
37 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
173 | PetscCall(MatMult(ctx->A,ctx->wx2,ctx->y1)); |
38 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
173 | PetscCall(MatMult(ctx->AT,ctx->x1,ctx->wy2)); |
39 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
173 | PetscCall(VecCopy(ctx->wy2,ctx->y2)); |
40 | } | ||
41 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAResetArray(ctx->x1)); |
42 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAResetArray(ctx->x2)); |
43 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAResetArray(ctx->y1)); |
44 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAResetArray(ctx->y2)); |
45 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDARestoreArrayRead(x,&d_px)); |
46 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDARestoreArrayWrite(y,&d_py)); |
47 | PetscFunctionReturn(PETSC_SUCCESS); | ||
48 | } | ||
49 | |||
50 | 4861 | PetscErrorCode MatMult_ECross_CUDA(Mat B,Vec x,Vec y) | |
51 | { | ||
52 | 4861 | SVD_CYCLIC_SHELL *ctx; | |
53 | 4861 | const PetscScalar *d_px; | |
54 | 4861 | PetscScalar *d_py; | |
55 | 4861 | PetscInt mn,m,n; | |
56 | |||
57 | 4861 | PetscFunctionBegin; | |
58 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(MatShellGetContext(B,&ctx)); |
59 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(MatGetLocalSize(ctx->A,NULL,&n)); |
60 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecGetLocalSize(y,&mn)); |
61 | 4861 | m = mn-n; | |
62 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAGetArrayRead(x,&d_px)); |
63 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAGetArrayWrite(y,&d_py)); |
64 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAPlaceArray(ctx->x1,d_px)); |
65 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAPlaceArray(ctx->x2,d_px+m)); |
66 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAPlaceArray(ctx->y1,d_py)); |
67 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAPlaceArray(ctx->y2,d_py+m)); |
68 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCopy(ctx->x1,ctx->y1)); |
69 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
4861 | if (!ctx->misaligned) { |
70 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
2453 | PetscCall(MatMult(ctx->A,ctx->x2,ctx->w)); |
71 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
2453 | PetscCall(MatMult(ctx->AT,ctx->w,ctx->y2)); |
72 | } else { /* prevent CUDA errors when bottom part is misaligned */ | ||
73 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
2408 | PetscCall(VecCopy(ctx->x2,ctx->wx2)); |
74 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
2408 | PetscCall(MatMult(ctx->A,ctx->wx2,ctx->w)); |
75 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
2408 | PetscCall(MatMult(ctx->AT,ctx->w,ctx->wy2)); |
76 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
2408 | PetscCall(VecCopy(ctx->wy2,ctx->y2)); |
77 | } | ||
78 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAResetArray(ctx->x1)); |
79 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAResetArray(ctx->x2)); |
80 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAResetArray(ctx->y1)); |
81 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAResetArray(ctx->y2)); |
82 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDARestoreArrayRead(x,&d_px)); |
83 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDARestoreArrayWrite(y,&d_py)); |
84 | PetscFunctionReturn(PETSC_SUCCESS); | ||
85 | } | ||
86 |