| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | 3553 | /* | |
| 2 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
| 3 | SLEPc - Scalable Library for Eigenvalue Problem Computations | ||
| 4 | Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain | ||
| 5 | |||
| 6 | This file is part of SLEPc. | ||
| 7 | SLEPc is distributed under a 2-clause BSD license (see LICENSE). | ||
| 8 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
| 9 | */ | ||
| 10 | /* | ||
| 11 | SLEPc singular value solver: "cyclic" (CUDA implementation) | ||
| 12 | */ | ||
| 13 | #include <slepc/private/svdimpl.h> | ||
| 14 | #include "../src/svd/impls/cyclic/cyclic.h" | ||
| 15 | |||
| 16 | 348 | PetscErrorCode MatMult_Cyclic_CUDA(Mat B,Vec x,Vec y) | |
| 17 | { | ||
| 18 | 348 | SVD_CYCLIC_SHELL *ctx; | |
| 19 | 348 | const PetscScalar *d_px; | |
| 20 | 348 | PetscScalar *d_py; | |
| 21 | 348 | PetscInt m; | |
| 22 | |||
| 23 | 348 | PetscFunctionBegin; | |
| 24 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(MatShellGetContext(B,&ctx)); |
| 25 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(MatGetLocalSize(ctx->A,&m,NULL)); |
| 26 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAGetArrayRead(x,&d_px)); |
| 27 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAGetArrayWrite(y,&d_py)); |
| 28 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAPlaceArray(ctx->x1,d_px)); |
| 29 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAPlaceArray(ctx->x2,d_px+m)); |
| 30 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAPlaceArray(ctx->y1,d_py)); |
| 31 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAPlaceArray(ctx->y2,d_py+m)); |
| 32 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
348 | if (!ctx->misaligned) { |
| 33 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
175 | PetscCall(MatMult(ctx->A,ctx->x2,ctx->y1)); |
| 34 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
175 | PetscCall(MatMult(ctx->AT,ctx->x1,ctx->y2)); |
| 35 | } else { /* prevent CUDA errors when bottom part is misaligned */ | ||
| 36 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
173 | PetscCall(VecCopy(ctx->x2,ctx->wx2)); |
| 37 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
173 | PetscCall(MatMult(ctx->A,ctx->wx2,ctx->y1)); |
| 38 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
173 | PetscCall(MatMult(ctx->AT,ctx->x1,ctx->wy2)); |
| 39 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
173 | PetscCall(VecCopy(ctx->wy2,ctx->y2)); |
| 40 | } | ||
| 41 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAResetArray(ctx->x1)); |
| 42 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAResetArray(ctx->x2)); |
| 43 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAResetArray(ctx->y1)); |
| 44 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDAResetArray(ctx->y2)); |
| 45 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDARestoreArrayRead(x,&d_px)); |
| 46 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
348 | PetscCall(VecCUDARestoreArrayWrite(y,&d_py)); |
| 47 | PetscFunctionReturn(PETSC_SUCCESS); | ||
| 48 | } | ||
| 49 | |||
| 50 | 4861 | PetscErrorCode MatMult_ECross_CUDA(Mat B,Vec x,Vec y) | |
| 51 | { | ||
| 52 | 4861 | SVD_CYCLIC_SHELL *ctx; | |
| 53 | 4861 | const PetscScalar *d_px; | |
| 54 | 4861 | PetscScalar *d_py; | |
| 55 | 4861 | PetscInt mn,m,n; | |
| 56 | |||
| 57 | 4861 | PetscFunctionBegin; | |
| 58 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(MatShellGetContext(B,&ctx)); |
| 59 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(MatGetLocalSize(ctx->A,NULL,&n)); |
| 60 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecGetLocalSize(y,&mn)); |
| 61 | 4861 | m = mn-n; | |
| 62 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAGetArrayRead(x,&d_px)); |
| 63 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAGetArrayWrite(y,&d_py)); |
| 64 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAPlaceArray(ctx->x1,d_px)); |
| 65 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAPlaceArray(ctx->x2,d_px+m)); |
| 66 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAPlaceArray(ctx->y1,d_py)); |
| 67 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAPlaceArray(ctx->y2,d_py+m)); |
| 68 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCopy(ctx->x1,ctx->y1)); |
| 69 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
4861 | if (!ctx->misaligned) { |
| 70 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
2453 | PetscCall(MatMult(ctx->A,ctx->x2,ctx->w)); |
| 71 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
2453 | PetscCall(MatMult(ctx->AT,ctx->w,ctx->y2)); |
| 72 | } else { /* prevent CUDA errors when bottom part is misaligned */ | ||
| 73 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
2408 | PetscCall(VecCopy(ctx->x2,ctx->wx2)); |
| 74 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
2408 | PetscCall(MatMult(ctx->A,ctx->wx2,ctx->w)); |
| 75 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
2408 | PetscCall(MatMult(ctx->AT,ctx->w,ctx->wy2)); |
| 76 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
2408 | PetscCall(VecCopy(ctx->wy2,ctx->y2)); |
| 77 | } | ||
| 78 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAResetArray(ctx->x1)); |
| 79 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAResetArray(ctx->x2)); |
| 80 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAResetArray(ctx->y1)); |
| 81 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDAResetArray(ctx->y2)); |
| 82 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDARestoreArrayRead(x,&d_px)); |
| 83 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
4861 | PetscCall(VecCUDARestoreArrayWrite(y,&d_py)); |
| 84 | PetscFunctionReturn(PETSC_SUCCESS); | ||
| 85 | } | ||
| 86 |