| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* | ||
| 2 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
| 3 | SLEPc - Scalable Library for Eigenvalue Problem Computations | ||
| 4 | Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain | ||
| 5 | |||
| 6 | This file is part of SLEPc. | ||
| 7 | SLEPc is distributed under a 2-clause BSD license (see LICENSE). | ||
| 8 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
| 9 | */ | ||
| 10 | /* | ||
| 11 | Square root function sqrt(x) | ||
| 12 | */ | ||
| 13 | |||
| 14 | #include <slepc/private/fnimpl.h> /*I "slepcfn.h" I*/ | ||
| 15 | #include <slepcblaslapack.h> | ||
| 16 | |||
| 17 | 12077 | static PetscErrorCode FNEvaluateFunction_Sqrt(FN fn,PetscScalar x,PetscScalar *y) | |
| 18 | { | ||
| 19 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
12077 | PetscFunctionBegin; |
| 20 | #if !defined(PETSC_USE_COMPLEX) | ||
| 21 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
4902 | PetscCheck(x>=0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Function not defined in the requested value"); |
| 22 | #endif | ||
| 23 | 12077 | *y = PetscSqrtScalar(x); | |
| 24 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
12077 | PetscFunctionReturn(PETSC_SUCCESS); |
| 25 | } | ||
| 26 | |||
| 27 | 690 | static PetscErrorCode FNEvaluateDerivative_Sqrt(FN fn,PetscScalar x,PetscScalar *y) | |
| 28 | { | ||
| 29 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
690 | PetscFunctionBegin; |
| 30 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
690 | PetscCheck(x!=0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value"); |
| 31 | #if !defined(PETSC_USE_COMPLEX) | ||
| 32 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
140 | PetscCheck(x>0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value"); |
| 33 | #endif | ||
| 34 | 690 | *y = 1.0/(2.0*PetscSqrtScalar(x)); | |
| 35 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
690 | PetscFunctionReturn(PETSC_SUCCESS); |
| 36 | } | ||
| 37 | |||
| 38 | 206 | static PetscErrorCode FNEvaluateFunctionMat_Sqrt_Schur(FN fn,Mat A,Mat B) | |
| 39 | { | ||
| 40 | 206 | PetscBLASInt n=0; | |
| 41 | 206 | PetscScalar *T; | |
| 42 | 206 | PetscInt m; | |
| 43 | |||
| 44 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
206 | PetscFunctionBegin; |
| 45 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
206 | if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN)); |
| 46 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
206 | PetscCall(MatDenseGetArray(B,&T)); |
| 47 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
206 | PetscCall(MatGetSize(A,&m,NULL)); |
| 48 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
206 | PetscCall(PetscBLASIntCast(m,&n)); |
| 49 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
206 | PetscCall(FNSqrtmSchur(fn,n,T,n,PETSC_FALSE)); |
| 50 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
206 | PetscCall(MatDenseRestoreArray(B,&T)); |
| 51 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
40 | PetscFunctionReturn(PETSC_SUCCESS); |
| 52 | } | ||
| 53 | |||
| 54 | 174 | static PetscErrorCode FNEvaluateFunctionMatVec_Sqrt_Schur(FN fn,Mat A,Vec v) | |
| 55 | { | ||
| 56 | 174 | PetscBLASInt n=0; | |
| 57 | 174 | PetscScalar *T; | |
| 58 | 174 | PetscInt m; | |
| 59 | 174 | Mat B; | |
| 60 | |||
| 61 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
174 | PetscFunctionBegin; |
| 62 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
174 | PetscCall(FN_AllocateWorkMat(fn,A,&B)); |
| 63 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
174 | PetscCall(MatDenseGetArray(B,&T)); |
| 64 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
174 | PetscCall(MatGetSize(A,&m,NULL)); |
| 65 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
174 | PetscCall(PetscBLASIntCast(m,&n)); |
| 66 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
174 | PetscCall(FNSqrtmSchur(fn,n,T,n,PETSC_TRUE)); |
| 67 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
174 | PetscCall(MatDenseRestoreArray(B,&T)); |
| 68 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
174 | PetscCall(MatGetColumnVector(B,v,0)); |
| 69 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
174 | PetscCall(FN_FreeWorkMat(fn,&B)); |
| 70 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
30 | PetscFunctionReturn(PETSC_SUCCESS); |
| 71 | } | ||
| 72 | |||
| 73 | 126 | static PetscErrorCode FNEvaluateFunctionMat_Sqrt_DBP(FN fn,Mat A,Mat B) | |
| 74 | { | ||
| 75 | 126 | PetscBLASInt n=0; | |
| 76 | 126 | PetscScalar *T; | |
| 77 | 126 | PetscInt m; | |
| 78 | |||
| 79 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
126 | PetscFunctionBegin; |
| 80 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
126 | if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN)); |
| 81 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatDenseGetArray(B,&T)); |
| 82 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatGetSize(A,&m,NULL)); |
| 83 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(PetscBLASIntCast(m,&n)); |
| 84 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(FNSqrtmDenmanBeavers(fn,n,T,n,PETSC_FALSE)); |
| 85 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatDenseRestoreArray(B,&T)); |
| 86 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
24 | PetscFunctionReturn(PETSC_SUCCESS); |
| 87 | } | ||
| 88 | |||
| 89 | 126 | static PetscErrorCode FNEvaluateFunctionMat_Sqrt_NS(FN fn,Mat A,Mat B) | |
| 90 | { | ||
| 91 | 126 | PetscBLASInt n=0; | |
| 92 | 126 | PetscScalar *Ba; | |
| 93 | 126 | PetscInt m; | |
| 94 | |||
| 95 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
126 | PetscFunctionBegin; |
| 96 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
126 | if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN)); |
| 97 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatDenseGetArray(B,&Ba)); |
| 98 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatGetSize(A,&m,NULL)); |
| 99 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(PetscBLASIntCast(m,&n)); |
| 100 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(FNSqrtmNewtonSchulz(fn,n,Ba,n,PETSC_FALSE)); |
| 101 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatDenseRestoreArray(B,&Ba)); |
| 102 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
24 | PetscFunctionReturn(PETSC_SUCCESS); |
| 103 | } | ||
| 104 | |||
| 105 | #define MAXIT 50 | ||
| 106 | |||
| 107 | /* | ||
| 108 | Computes the principal square root of the matrix A using the | ||
| 109 | Sadeghi iteration. A is overwritten with sqrtm(A). | ||
| 110 | */ | ||
| 111 | 246 | PetscErrorCode FNSqrtmSadeghi(FN fn,PetscBLASInt n,PetscScalar *A,PetscBLASInt ld) | |
| 112 | { | ||
| 113 | 246 | PetscScalar *M,*M2,*G,*X=A,*work,work1,sqrtnrm; | |
| 114 | 246 | PetscScalar szero=0.0,sone=1.0,smfive=-5.0,s1d16=1.0/16.0; | |
| 115 | 246 | PetscReal tol,Mres=0.0,nrm,rwork[1],done=1.0; | |
| 116 | 246 | PetscInt i,it; | |
| 117 | 246 | PetscBLASInt N,*piv=NULL,info,lwork=0,query=-1,one=1,zero=0; | |
| 118 | 246 | PetscBool converged=PETSC_FALSE; | |
| 119 | 246 | unsigned int ftz; | |
| 120 | |||
| 121 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
246 | PetscFunctionBegin; |
| 122 | 246 | N = n*n; | |
| 123 | 246 | tol = PetscSqrtReal((PetscReal)n)*PETSC_MACHINE_EPSILON/2; | |
| 124 |
3/6✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
246 | PetscCall(SlepcSetFlushToZero(&ftz)); |
| 125 | |||
| 126 | /* query work size */ | ||
| 127 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
246 | PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,piv,&work1,&query,&info)); |
| 128 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
246 | PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(work1),&lwork)); |
| 129 | |||
| 130 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
246 | PetscCall(PetscMalloc5(N,&M,N,&M2,N,&G,lwork,&work,n,&piv)); |
| 131 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
246 | PetscCall(PetscArraycpy(M,A,N)); |
| 132 | |||
| 133 | /* scale M */ | ||
| 134 | 246 | nrm = LAPACKlange_("fro",&n,&n,M,&n,rwork); | |
| 135 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
246 | if (nrm>1.0) { |
| 136 | 246 | sqrtnrm = PetscSqrtReal(nrm); | |
| 137 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
246 | PetscCallBLAS("LAPACKlascl",LAPACKlascl_("G",&zero,&zero,&nrm,&done,&N,&one,M,&N,&info)); |
| 138 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
246 | SlepcCheckLapackInfo("lascl",info); |
| 139 | 246 | tol *= nrm; | |
| 140 | } | ||
| 141 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
246 | PetscCall(PetscInfo(fn,"||A||_F = %g, new tol: %g\n",(double)nrm,(double)tol)); |
| 142 | |||
| 143 | /* X = I */ | ||
| 144 |
4/6✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
246 | PetscCall(PetscArrayzero(X,N)); |
| 145 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
13560 | for (i=0;i<n;i++) X[i+i*ld] = 1.0; |
| 146 | |||
| 147 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1294 | for (it=0;it<MAXIT && !converged;it++) { |
| 148 | |||
| 149 | /* G = (5/16)*I + (1/16)*M*(15*I-5*M+M*M) */ | ||
| 150 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
1048 | PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,M,&ld,M,&ld,&szero,M2,&ld)); |
| 151 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
1048 | PetscCallBLAS("BLASaxpy",BLASaxpy_(&N,&smfive,M,&one,M2,&one)); |
| 152 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
63656 | for (i=0;i<n;i++) M2[i+i*ld] += 15.0; |
| 153 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
1048 | PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&s1d16,M,&ld,M2,&ld,&szero,G,&ld)); |
| 154 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
63656 | for (i=0;i<n;i++) G[i+i*ld] += 5.0/16.0; |
| 155 | |||
| 156 | /* X = X*G */ | ||
| 157 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1048 | PetscCall(PetscArraycpy(M2,X,N)); |
| 158 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
1048 | PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,M2,&ld,G,&ld,&szero,X,&ld)); |
| 159 | |||
| 160 | /* M = M*inv(G*G) */ | ||
| 161 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
1048 | PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,G,&ld,G,&ld,&szero,M2,&ld)); |
| 162 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
1048 | PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,M2,&ld,piv,&info)); |
| 163 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
1048 | SlepcCheckLapackInfo("getrf",info); |
| 164 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
1048 | PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n,M2,&ld,piv,work,&lwork,&info)); |
| 165 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
1048 | SlepcCheckLapackInfo("getri",info); |
| 166 | |||
| 167 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1048 | PetscCall(PetscArraycpy(G,M,N)); |
| 168 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
1048 | PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,G,&ld,M2,&ld,&szero,M,&ld)); |
| 169 | |||
| 170 | /* check ||I-M|| */ | ||
| 171 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1048 | PetscCall(PetscArraycpy(M2,M,N)); |
| 172 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
63656 | for (i=0;i<n;i++) M2[i+i*ld] -= 1.0; |
| 173 | 1048 | Mres = LAPACKlange_("fro",&n,&n,M2,&n,rwork); | |
| 174 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
1048 | PetscCheck(!PetscIsNanReal(Mres),PETSC_COMM_SELF,PETSC_ERR_FP,"The computed norm is not-a-number"); |
| 175 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1048 | if (Mres<=tol) converged = PETSC_TRUE; |
| 176 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1048 | PetscCall(PetscInfo(fn,"it: %" PetscInt_FMT " res: %g\n",it,(double)Mres)); |
| 177 |
3/6✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1048 | PetscCall(PetscLogFlops(8.0*n*n*n+2.0*n*n+2.0*n*n*n/3.0+4.0*n*n*n/3.0+2.0*n*n*n+2.0*n*n)); |
| 178 | } | ||
| 179 | |||
| 180 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
246 | PetscCheck(Mres<=tol,PETSC_COMM_SELF,PETSC_ERR_LIB,"SQRTM not converged after %d iterations",MAXIT); |
| 181 | |||
| 182 | /* undo scaling */ | ||
| 183 |
11/22✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 2 times.
✓ Branch 10 taken 2 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✓ Branch 13 taken 2 times.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✓ Branch 16 taken 2 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✓ Branch 19 taken 2 times.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
|
246 | if (nrm>1.0) PetscCallBLAS("BLASscal",BLASscal_(&N,&sqrtnrm,A,&one)); |
| 184 | |||
| 185 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
246 | PetscCall(PetscFree5(M,M2,G,work,piv)); |
| 186 |
3/6✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
73 | PetscCall(SlepcResetFlushToZero(&ftz)); |
| 187 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
48 | PetscFunctionReturn(PETSC_SUCCESS); |
| 188 | } | ||
| 189 | |||
| 190 | #if defined(PETSC_HAVE_CUDA) | ||
| 191 | #include "../src/sys/classes/fn/impls/cuda/fnutilcuda.h" | ||
| 192 | #include <slepccupmblas.h> | ||
| 193 | |||
| 194 | #if defined(PETSC_HAVE_MAGMA) | ||
| 195 | #include <slepcmagma.h> | ||
| 196 | |||
| 197 | /* | ||
| 198 | * Matrix square root by Sadeghi iteration. CUDA version. | ||
| 199 | * Computes the principal square root of the matrix A using the | ||
| 200 | * Sadeghi iteration. A is overwritten with sqrtm(A). | ||
| 201 | */ | ||
| 202 | 48 | PetscErrorCode FNSqrtmSadeghi_CUDAm(FN fn,PetscBLASInt n,PetscScalar *d_A,PetscBLASInt ld) | |
| 203 | { | ||
| 204 | 48 | PetscScalar *d_M,*d_M2,*d_G,*d_work,alpha; | |
| 205 | 48 | const PetscScalar szero=0.0,sone=1.0,smfive=-5.0,s15=15.0,s1d16=1.0/16.0; | |
| 206 | 48 | PetscReal tol,Mres=0.0,nrm,sqrtnrm=1.0; | |
| 207 | 48 | PetscInt it,nb,lwork; | |
| 208 | 48 | PetscBLASInt *piv,N; | |
| 209 | 48 | const PetscBLASInt one=1; | |
| 210 | 48 | PetscBool converged=PETSC_FALSE; | |
| 211 | 48 | cublasHandle_t cublasv2handle; | |
| 212 | |||
| 213 | 48 | PetscFunctionBegin; | |
| 214 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
48 | PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); /* For CUDA event timers */ |
| 215 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
48 | PetscCall(PetscCUBLASGetHandle(&cublasv2handle)); |
| 216 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
48 | PetscCall(SlepcMagmaInit()); |
| 217 | 48 | N = n*n; | |
| 218 | 48 | tol = PetscSqrtReal((PetscReal)n)*PETSC_MACHINE_EPSILON/2; | |
| 219 | |||
| 220 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
48 | PetscCall(PetscMalloc1(n,&piv)); |
| 221 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaMalloc((void **)&d_M,sizeof(PetscScalar)*N)); |
| 222 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaMalloc((void **)&d_M2,sizeof(PetscScalar)*N)); |
| 223 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaMalloc((void **)&d_G,sizeof(PetscScalar)*N)); |
| 224 | |||
| 225 | 48 | nb = magma_get_xgetri_nb(n); | |
| 226 | 48 | lwork = nb*n; | |
| 227 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaMalloc((void **)&d_work,sizeof(PetscScalar)*lwork)); |
| 228 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
48 | PetscCall(PetscLogGpuTimeBegin()); |
| 229 | |||
| 230 | /* M = A */ | ||
| 231 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaMemcpy(d_M,d_A,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice)); |
| 232 | |||
| 233 | /* scale M */ | ||
| 234 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
48 | PetscCallCUBLAS(cublasXnrm2(cublasv2handle,N,d_M,one,&nrm)); |
| 235 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
48 | if (nrm>1.0) { |
| 236 | 48 | sqrtnrm = PetscSqrtReal(nrm); | |
| 237 | 48 | alpha = 1.0/nrm; | |
| 238 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
48 | PetscCallCUBLAS(cublasXscal(cublasv2handle,N,&alpha,d_M,one)); |
| 239 | 48 | tol *= nrm; | |
| 240 | } | ||
| 241 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
48 | PetscCall(PetscInfo(fn,"||A||_F = %g, new tol: %g\n",(double)nrm,(double)tol)); |
| 242 | |||
| 243 | /* X = I */ | ||
| 244 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaMemset(d_A,0,sizeof(PetscScalar)*N)); |
| 245 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
48 | PetscCall(set_diagonal(n,d_A,ld,sone)); |
| 246 | |||
| 247 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
248 | for (it=0;it<MAXIT && !converged;it++) { |
| 248 | |||
| 249 | /* G = (5/16)*I + (1/16)*M*(15*I-5*M+M*M) */ | ||
| 250 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
200 | PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_M,ld,d_M,ld,&szero,d_M2,ld)); |
| 251 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
200 | PetscCallCUBLAS(cublasXaxpy(cublasv2handle,N,&smfive,d_M,one,d_M2,one)); |
| 252 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
200 | PetscCall(shift_diagonal(n,d_M2,ld,s15)); |
| 253 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
200 | PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&s1d16,d_M,ld,d_M2,ld,&szero,d_G,ld)); |
| 254 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
200 | PetscCall(shift_diagonal(n,d_G,ld,5.0/16.0)); |
| 255 | |||
| 256 | /* X = X*G */ | ||
| 257 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
200 | PetscCallCUDA(cudaMemcpy(d_M2,d_A,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice)); |
| 258 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
200 | PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_M2,ld,d_G,ld,&szero,d_A,ld)); |
| 259 | |||
| 260 | /* M = M*inv(G*G) */ | ||
| 261 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
200 | PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_G,ld,d_G,ld,&szero,d_M2,ld)); |
| 262 | /* magma */ | ||
| 263 |
2/6✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
200 | PetscCallMAGMA(magma_xgetrf_gpu,n,n,d_M2,ld,piv); |
| 264 |
2/6✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
200 | PetscCallMAGMA(magma_xgetri_gpu,n,d_M2,ld,piv,d_work,lwork); |
| 265 | /* magma */ | ||
| 266 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
200 | PetscCallCUDA(cudaMemcpy(d_G,d_M,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice)); |
| 267 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
200 | PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_G,ld,d_M2,ld,&szero,d_M,ld)); |
| 268 | |||
| 269 | /* check ||I-M|| */ | ||
| 270 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
200 | PetscCallCUDA(cudaMemcpy(d_M2,d_M,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice)); |
| 271 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
200 | PetscCall(shift_diagonal(n,d_M2,ld,-1.0)); |
| 272 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
200 | PetscCallCUBLAS(cublasXnrm2(cublasv2handle,N,d_M2,one,&Mres)); |
| 273 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
200 | PetscCheck(!PetscIsNanReal(Mres),PETSC_COMM_SELF,PETSC_ERR_FP,"The computed norm is not-a-number"); |
| 274 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
200 | if (Mres<=tol) converged = PETSC_TRUE; |
| 275 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
200 | PetscCall(PetscInfo(fn,"it: %" PetscInt_FMT " res: %g\n",it,(double)Mres)); |
| 276 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
200 | PetscCall(PetscLogGpuFlops(8.0*n*n*n+2.0*n*n+2.0*n*n*n/3.0+4.0*n*n*n/3.0+2.0*n*n*n+2.0*n*n)); |
| 277 | } | ||
| 278 | |||
| 279 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCheck(Mres<=tol,PETSC_COMM_SELF,PETSC_ERR_LIB,"SQRTM not converged after %d iterations", MAXIT); |
| 280 | |||
| 281 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
48 | if (nrm>1.0) { |
| 282 | 48 | alpha = sqrtnrm; | |
| 283 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
48 | PetscCallCUBLAS(cublasXscal(cublasv2handle,N,&alpha,d_A,one)); |
| 284 | } | ||
| 285 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
48 | PetscCall(PetscLogGpuTimeEnd()); |
| 286 | |||
| 287 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaFree(d_M)); |
| 288 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaFree(d_M2)); |
| 289 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaFree(d_G)); |
| 290 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaFree(d_work)); |
| 291 |
2/4✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
|
48 | PetscCall(PetscFree(piv)); |
| 292 | PetscFunctionReturn(PETSC_SUCCESS); | ||
| 293 | } | ||
| 294 | #endif /* PETSC_HAVE_MAGMA */ | ||
| 295 | #endif /* PETSC_HAVE_CUDA */ | ||
| 296 | |||
| 297 | 126 | static PetscErrorCode FNEvaluateFunctionMat_Sqrt_Sadeghi(FN fn,Mat A,Mat B) | |
| 298 | { | ||
| 299 | 126 | PetscBLASInt n=0; | |
| 300 | 126 | PetscScalar *Ba; | |
| 301 | 126 | PetscInt m; | |
| 302 | |||
| 303 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
126 | PetscFunctionBegin; |
| 304 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
126 | if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN)); |
| 305 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatDenseGetArray(B,&Ba)); |
| 306 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatGetSize(A,&m,NULL)); |
| 307 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(PetscBLASIntCast(m,&n)); |
| 308 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(FNSqrtmSadeghi(fn,n,Ba,n)); |
| 309 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatDenseRestoreArray(B,&Ba)); |
| 310 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
24 | PetscFunctionReturn(PETSC_SUCCESS); |
| 311 | } | ||
| 312 | |||
| 313 | #if defined(PETSC_HAVE_CUDA) | ||
| 314 | 24 | PetscErrorCode FNEvaluateFunctionMat_Sqrt_NS_CUDA(FN fn,Mat A,Mat B) | |
| 315 | { | ||
| 316 | 24 | PetscBLASInt n=0; | |
| 317 | 24 | PetscScalar *Ba; | |
| 318 | 24 | PetscInt m; | |
| 319 | |||
| 320 | 24 | PetscFunctionBegin; | |
| 321 |
2/4✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
|
24 | if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN)); |
| 322 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatDenseCUDAGetArray(B,&Ba)); |
| 323 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatGetSize(A,&m,NULL)); |
| 324 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(PetscBLASIntCast(m,&n)); |
| 325 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(FNSqrtmNewtonSchulz_CUDA(fn,n,Ba,n,PETSC_FALSE)); |
| 326 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatDenseCUDARestoreArray(B,&Ba)); |
| 327 | PetscFunctionReturn(PETSC_SUCCESS); | ||
| 328 | } | ||
| 329 | |||
| 330 | #if defined(PETSC_HAVE_MAGMA) | ||
| 331 | 24 | PetscErrorCode FNEvaluateFunctionMat_Sqrt_DBP_CUDAm(FN fn,Mat A,Mat B) | |
| 332 | { | ||
| 333 | 24 | PetscBLASInt n=0; | |
| 334 | 24 | PetscScalar *T; | |
| 335 | 24 | PetscInt m; | |
| 336 | |||
| 337 | 24 | PetscFunctionBegin; | |
| 338 |
2/4✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
|
24 | if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN)); |
| 339 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatDenseCUDAGetArray(B,&T)); |
| 340 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatGetSize(A,&m,NULL)); |
| 341 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(PetscBLASIntCast(m,&n)); |
| 342 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(FNSqrtmDenmanBeavers_CUDAm(fn,n,T,n,PETSC_FALSE)); |
| 343 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatDenseCUDARestoreArray(B,&T)); |
| 344 | PetscFunctionReturn(PETSC_SUCCESS); | ||
| 345 | } | ||
| 346 | |||
| 347 | 24 | PetscErrorCode FNEvaluateFunctionMat_Sqrt_Sadeghi_CUDAm(FN fn,Mat A,Mat B) | |
| 348 | { | ||
| 349 | 24 | PetscBLASInt n=0; | |
| 350 | 24 | PetscScalar *Ba; | |
| 351 | 24 | PetscInt m; | |
| 352 | |||
| 353 | 24 | PetscFunctionBegin; | |
| 354 |
2/4✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
|
24 | if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN)); |
| 355 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatDenseCUDAGetArray(B,&Ba)); |
| 356 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatGetSize(A,&m,NULL)); |
| 357 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(PetscBLASIntCast(m,&n)); |
| 358 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(FNSqrtmSadeghi_CUDAm(fn,n,Ba,n)); |
| 359 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatDenseCUDARestoreArray(B,&Ba)); |
| 360 | PetscFunctionReturn(PETSC_SUCCESS); | ||
| 361 | } | ||
| 362 | #endif /* PETSC_HAVE_MAGMA */ | ||
| 363 | #endif /* PETSC_HAVE_CUDA */ | ||
| 364 | |||
| 365 | 194 | static PetscErrorCode FNView_Sqrt(FN fn,PetscViewer viewer) | |
| 366 | { | ||
| 367 | 194 | PetscBool isascii; | |
| 368 | 194 | char str[50]; | |
| 369 | 194 | const char *methodname[] = { | |
| 370 | "Schur method for the square root", | ||
| 371 | "Denman-Beavers (product form)", | ||
| 372 | "Newton-Schulz iteration", | ||
| 373 | "Sadeghi iteration" | ||
| 374 | }; | ||
| 375 | 194 | const int nmeth=PETSC_STATIC_ARRAY_LENGTH(methodname); | |
| 376 | |||
| 377 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
194 | PetscFunctionBegin; |
| 378 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
194 | PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii)); |
| 379 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
194 | if (isascii) { |
| 380 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
194 | if (fn->beta==(PetscScalar)1.0) { |
| 381 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
18 | if (fn->alpha==(PetscScalar)1.0) PetscCall(PetscViewerASCIIPrintf(viewer," square root: sqrt(x)\n")); |
| 382 | else { | ||
| 383 | ✗ | PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE)); | |
| 384 | ✗ | PetscCall(PetscViewerASCIIPrintf(viewer," square root: sqrt(%s*x)\n",str)); | |
| 385 | } | ||
| 386 | } else { | ||
| 387 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
176 | PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE)); |
| 388 |
1/8✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
176 | if (fn->alpha==(PetscScalar)1.0) PetscCall(PetscViewerASCIIPrintf(viewer," square root: %s*sqrt(x)\n",str)); |
| 389 | else { | ||
| 390 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
176 | PetscCall(PetscViewerASCIIPrintf(viewer," square root: %s",str)); |
| 391 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
176 | PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE)); |
| 392 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
176 | PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE)); |
| 393 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
176 | PetscCall(PetscViewerASCIIPrintf(viewer,"*sqrt(%s*x)\n",str)); |
| 394 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
176 | PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE)); |
| 395 | } | ||
| 396 | } | ||
| 397 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
194 | if (fn->method<nmeth) PetscCall(PetscViewerASCIIPrintf(viewer," computing matrix functions with: %s\n",methodname[fn->method])); |
| 398 | } | ||
| 399 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
31 | PetscFunctionReturn(PETSC_SUCCESS); |
| 400 | } | ||
| 401 | |||
| 402 | /*MC | ||
| 403 | FNSQRT - FNSQRT = "sqrt" - The square root function $f(x)=\sqrt{x}$. | ||
| 404 | |||
| 405 | Level: beginner | ||
| 406 | |||
| 407 | .seealso: [](sec:fn), `FN`, `FNType`, `FNSetType()` | ||
| 408 | M*/ | ||
| 409 | |||
| 410 | 368 | SLEPC_EXTERN PetscErrorCode FNCreate_Sqrt(FN fn) | |
| 411 | { | ||
| 412 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
368 | PetscFunctionBegin; |
| 413 | 368 | fn->ops->evaluatefunction = FNEvaluateFunction_Sqrt; | |
| 414 | 368 | fn->ops->evaluatederivative = FNEvaluateDerivative_Sqrt; | |
| 415 | 368 | fn->ops->evaluatefunctionmat[0] = FNEvaluateFunctionMat_Sqrt_Schur; | |
| 416 | 368 | fn->ops->evaluatefunctionmat[1] = FNEvaluateFunctionMat_Sqrt_DBP; | |
| 417 | 368 | fn->ops->evaluatefunctionmat[2] = FNEvaluateFunctionMat_Sqrt_NS; | |
| 418 | 368 | fn->ops->evaluatefunctionmat[3] = FNEvaluateFunctionMat_Sqrt_Sadeghi; | |
| 419 | #if defined(PETSC_HAVE_CUDA) | ||
| 420 | 82 | fn->ops->evaluatefunctionmatcuda[2] = FNEvaluateFunctionMat_Sqrt_NS_CUDA; | |
| 421 | #if defined(PETSC_HAVE_MAGMA) | ||
| 422 | 82 | fn->ops->evaluatefunctionmatcuda[1] = FNEvaluateFunctionMat_Sqrt_DBP_CUDAm; | |
| 423 | 82 | fn->ops->evaluatefunctionmatcuda[3] = FNEvaluateFunctionMat_Sqrt_Sadeghi_CUDAm; | |
| 424 | #endif /* PETSC_HAVE_MAGMA */ | ||
| 425 | #endif /* PETSC_HAVE_CUDA */ | ||
| 426 | 368 | fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Sqrt_Schur; | |
| 427 | 368 | fn->ops->view = FNView_Sqrt; | |
| 428 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
368 | PetscFunctionReturn(PETSC_SUCCESS); |
| 429 | } | ||
| 430 |