Line | Branch | Exec | Source |
---|---|---|---|
1 | /* | ||
2 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
3 | SLEPc - Scalable Library for Eigenvalue Problem Computations | ||
4 | Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain | ||
5 | |||
6 | This file is part of SLEPc. | ||
7 | SLEPc is distributed under a 2-clause BSD license (see LICENSE). | ||
8 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | ||
9 | */ | ||
10 | /* | ||
11 | Square root function sqrt(x) | ||
12 | */ | ||
13 | |||
14 | #include <slepc/private/fnimpl.h> /*I "slepcfn.h" I*/ | ||
15 | #include <slepcblaslapack.h> | ||
16 | |||
17 | 12077 | static PetscErrorCode FNEvaluateFunction_Sqrt(FN fn,PetscScalar x,PetscScalar *y) | |
18 | { | ||
19 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
12077 | PetscFunctionBegin; |
20 | #if !defined(PETSC_USE_COMPLEX) | ||
21 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
4902 | PetscCheck(x>=0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Function not defined in the requested value"); |
22 | #endif | ||
23 | 12077 | *y = PetscSqrtScalar(x); | |
24 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
12077 | PetscFunctionReturn(PETSC_SUCCESS); |
25 | } | ||
26 | |||
27 | 690 | static PetscErrorCode FNEvaluateDerivative_Sqrt(FN fn,PetscScalar x,PetscScalar *y) | |
28 | { | ||
29 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
690 | PetscFunctionBegin; |
30 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
690 | PetscCheck(x!=0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value"); |
31 | #if !defined(PETSC_USE_COMPLEX) | ||
32 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
140 | PetscCheck(x>0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value"); |
33 | #endif | ||
34 | 690 | *y = 1.0/(2.0*PetscSqrtScalar(x)); | |
35 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
690 | PetscFunctionReturn(PETSC_SUCCESS); |
36 | } | ||
37 | |||
38 | 206 | static PetscErrorCode FNEvaluateFunctionMat_Sqrt_Schur(FN fn,Mat A,Mat B) | |
39 | { | ||
40 | 206 | PetscBLASInt n=0; | |
41 | 206 | PetscScalar *T; | |
42 | 206 | PetscInt m; | |
43 | |||
44 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
206 | PetscFunctionBegin; |
45 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
206 | if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN)); |
46 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
206 | PetscCall(MatDenseGetArray(B,&T)); |
47 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
206 | PetscCall(MatGetSize(A,&m,NULL)); |
48 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
206 | PetscCall(PetscBLASIntCast(m,&n)); |
49 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
206 | PetscCall(FNSqrtmSchur(fn,n,T,n,PETSC_FALSE)); |
50 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
206 | PetscCall(MatDenseRestoreArray(B,&T)); |
51 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
40 | PetscFunctionReturn(PETSC_SUCCESS); |
52 | } | ||
53 | |||
54 | 174 | static PetscErrorCode FNEvaluateFunctionMatVec_Sqrt_Schur(FN fn,Mat A,Vec v) | |
55 | { | ||
56 | 174 | PetscBLASInt n=0; | |
57 | 174 | PetscScalar *T; | |
58 | 174 | PetscInt m; | |
59 | 174 | Mat B; | |
60 | |||
61 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
174 | PetscFunctionBegin; |
62 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
174 | PetscCall(FN_AllocateWorkMat(fn,A,&B)); |
63 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
174 | PetscCall(MatDenseGetArray(B,&T)); |
64 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
174 | PetscCall(MatGetSize(A,&m,NULL)); |
65 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
174 | PetscCall(PetscBLASIntCast(m,&n)); |
66 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
174 | PetscCall(FNSqrtmSchur(fn,n,T,n,PETSC_TRUE)); |
67 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
174 | PetscCall(MatDenseRestoreArray(B,&T)); |
68 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
174 | PetscCall(MatGetColumnVector(B,v,0)); |
69 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
174 | PetscCall(FN_FreeWorkMat(fn,&B)); |
70 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
30 | PetscFunctionReturn(PETSC_SUCCESS); |
71 | } | ||
72 | |||
73 | 126 | static PetscErrorCode FNEvaluateFunctionMat_Sqrt_DBP(FN fn,Mat A,Mat B) | |
74 | { | ||
75 | 126 | PetscBLASInt n=0; | |
76 | 126 | PetscScalar *T; | |
77 | 126 | PetscInt m; | |
78 | |||
79 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
126 | PetscFunctionBegin; |
80 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
126 | if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN)); |
81 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatDenseGetArray(B,&T)); |
82 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatGetSize(A,&m,NULL)); |
83 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(PetscBLASIntCast(m,&n)); |
84 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(FNSqrtmDenmanBeavers(fn,n,T,n,PETSC_FALSE)); |
85 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatDenseRestoreArray(B,&T)); |
86 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
24 | PetscFunctionReturn(PETSC_SUCCESS); |
87 | } | ||
88 | |||
89 | 126 | static PetscErrorCode FNEvaluateFunctionMat_Sqrt_NS(FN fn,Mat A,Mat B) | |
90 | { | ||
91 | 126 | PetscBLASInt n=0; | |
92 | 126 | PetscScalar *Ba; | |
93 | 126 | PetscInt m; | |
94 | |||
95 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
126 | PetscFunctionBegin; |
96 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
126 | if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN)); |
97 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatDenseGetArray(B,&Ba)); |
98 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatGetSize(A,&m,NULL)); |
99 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(PetscBLASIntCast(m,&n)); |
100 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(FNSqrtmNewtonSchulz(fn,n,Ba,n,PETSC_FALSE)); |
101 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatDenseRestoreArray(B,&Ba)); |
102 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
24 | PetscFunctionReturn(PETSC_SUCCESS); |
103 | } | ||
104 | |||
105 | #define MAXIT 50 | ||
106 | |||
107 | /* | ||
108 | Computes the principal square root of the matrix A using the | ||
109 | Sadeghi iteration. A is overwritten with sqrtm(A). | ||
110 | */ | ||
111 | 246 | PetscErrorCode FNSqrtmSadeghi(FN fn,PetscBLASInt n,PetscScalar *A,PetscBLASInt ld) | |
112 | { | ||
113 | 246 | PetscScalar *M,*M2,*G,*X=A,*work,work1,sqrtnrm; | |
114 | 246 | PetscScalar szero=0.0,sone=1.0,smfive=-5.0,s1d16=1.0/16.0; | |
115 | 246 | PetscReal tol,Mres=0.0,nrm,rwork[1],done=1.0; | |
116 | 246 | PetscInt i,it; | |
117 | 246 | PetscBLASInt N,*piv=NULL,info,lwork=0,query=-1,one=1,zero=0; | |
118 | 246 | PetscBool converged=PETSC_FALSE; | |
119 | 246 | unsigned int ftz; | |
120 | |||
121 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
246 | PetscFunctionBegin; |
122 | 246 | N = n*n; | |
123 | 246 | tol = PetscSqrtReal((PetscReal)n)*PETSC_MACHINE_EPSILON/2; | |
124 |
3/6✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
246 | PetscCall(SlepcSetFlushToZero(&ftz)); |
125 | |||
126 | /* query work size */ | ||
127 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
246 | PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,piv,&work1,&query,&info)); |
128 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
246 | PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(work1),&lwork)); |
129 | |||
130 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
246 | PetscCall(PetscMalloc5(N,&M,N,&M2,N,&G,lwork,&work,n,&piv)); |
131 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
246 | PetscCall(PetscArraycpy(M,A,N)); |
132 | |||
133 | /* scale M */ | ||
134 | 246 | nrm = LAPACKlange_("fro",&n,&n,M,&n,rwork); | |
135 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
246 | if (nrm>1.0) { |
136 | 246 | sqrtnrm = PetscSqrtReal(nrm); | |
137 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
246 | PetscCallBLAS("LAPACKlascl",LAPACKlascl_("G",&zero,&zero,&nrm,&done,&N,&one,M,&N,&info)); |
138 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
246 | SlepcCheckLapackInfo("lascl",info); |
139 | 246 | tol *= nrm; | |
140 | } | ||
141 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
246 | PetscCall(PetscInfo(fn,"||A||_F = %g, new tol: %g\n",(double)nrm,(double)tol)); |
142 | |||
143 | /* X = I */ | ||
144 |
4/6✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
246 | PetscCall(PetscArrayzero(X,N)); |
145 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
13560 | for (i=0;i<n;i++) X[i+i*ld] = 1.0; |
146 | |||
147 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1294 | for (it=0;it<MAXIT && !converged;it++) { |
148 | |||
149 | /* G = (5/16)*I + (1/16)*M*(15*I-5*M+M*M) */ | ||
150 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
1048 | PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,M,&ld,M,&ld,&szero,M2,&ld)); |
151 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
1048 | PetscCallBLAS("BLASaxpy",BLASaxpy_(&N,&smfive,M,&one,M2,&one)); |
152 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
63656 | for (i=0;i<n;i++) M2[i+i*ld] += 15.0; |
153 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
1048 | PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&s1d16,M,&ld,M2,&ld,&szero,G,&ld)); |
154 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
63656 | for (i=0;i<n;i++) G[i+i*ld] += 5.0/16.0; |
155 | |||
156 | /* X = X*G */ | ||
157 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1048 | PetscCall(PetscArraycpy(M2,X,N)); |
158 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
1048 | PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,M2,&ld,G,&ld,&szero,X,&ld)); |
159 | |||
160 | /* M = M*inv(G*G) */ | ||
161 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
1048 | PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,G,&ld,G,&ld,&szero,M2,&ld)); |
162 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
1048 | PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,M2,&ld,piv,&info)); |
163 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
1048 | SlepcCheckLapackInfo("getrf",info); |
164 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
1048 | PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n,M2,&ld,piv,work,&lwork,&info)); |
165 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
1048 | SlepcCheckLapackInfo("getri",info); |
166 | |||
167 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1048 | PetscCall(PetscArraycpy(G,M,N)); |
168 |
10/20✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
|
1048 | PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,G,&ld,M2,&ld,&szero,M,&ld)); |
169 | |||
170 | /* check ||I-M|| */ | ||
171 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1048 | PetscCall(PetscArraycpy(M2,M,N)); |
172 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
63656 | for (i=0;i<n;i++) M2[i+i*ld] -= 1.0; |
173 | 1048 | Mres = LAPACKlange_("fro",&n,&n,M2,&n,rwork); | |
174 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
1048 | PetscCheck(!PetscIsNanReal(Mres),PETSC_COMM_SELF,PETSC_ERR_FP,"The computed norm is not-a-number"); |
175 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
1048 | if (Mres<=tol) converged = PETSC_TRUE; |
176 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1048 | PetscCall(PetscInfo(fn,"it: %" PetscInt_FMT " res: %g\n",it,(double)Mres)); |
177 |
3/6✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
1048 | PetscCall(PetscLogFlops(8.0*n*n*n+2.0*n*n+2.0*n*n*n/3.0+4.0*n*n*n/3.0+2.0*n*n*n+2.0*n*n)); |
178 | } | ||
179 | |||
180 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
246 | PetscCheck(Mres<=tol,PETSC_COMM_SELF,PETSC_ERR_LIB,"SQRTM not converged after %d iterations",MAXIT); |
181 | |||
182 | /* undo scaling */ | ||
183 |
11/22✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 2 times.
✓ Branch 10 taken 2 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✓ Branch 13 taken 2 times.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✓ Branch 16 taken 2 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✓ Branch 19 taken 2 times.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
|
246 | if (nrm>1.0) PetscCallBLAS("BLASscal",BLASscal_(&N,&sqrtnrm,A,&one)); |
184 | |||
185 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
246 | PetscCall(PetscFree5(M,M2,G,work,piv)); |
186 |
3/6✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
73 | PetscCall(SlepcResetFlushToZero(&ftz)); |
187 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
48 | PetscFunctionReturn(PETSC_SUCCESS); |
188 | } | ||
189 | |||
190 | #if defined(PETSC_HAVE_CUDA) | ||
191 | #include "../src/sys/classes/fn/impls/cuda/fnutilcuda.h" | ||
192 | #include <slepccupmblas.h> | ||
193 | |||
194 | #if defined(PETSC_HAVE_MAGMA) | ||
195 | #include <slepcmagma.h> | ||
196 | |||
197 | /* | ||
198 | * Matrix square root by Sadeghi iteration. CUDA version. | ||
199 | * Computes the principal square root of the matrix A using the | ||
200 | * Sadeghi iteration. A is overwritten with sqrtm(A). | ||
201 | */ | ||
202 | 48 | PetscErrorCode FNSqrtmSadeghi_CUDAm(FN fn,PetscBLASInt n,PetscScalar *d_A,PetscBLASInt ld) | |
203 | { | ||
204 | 48 | PetscScalar *d_M,*d_M2,*d_G,*d_work,alpha; | |
205 | 48 | const PetscScalar szero=0.0,sone=1.0,smfive=-5.0,s15=15.0,s1d16=1.0/16.0; | |
206 | 48 | PetscReal tol,Mres=0.0,nrm,sqrtnrm=1.0; | |
207 | 48 | PetscInt it,nb,lwork; | |
208 | 48 | PetscBLASInt *piv,N; | |
209 | 48 | const PetscBLASInt one=1; | |
210 | 48 | PetscBool converged=PETSC_FALSE; | |
211 | 48 | cublasHandle_t cublasv2handle; | |
212 | |||
213 | 48 | PetscFunctionBegin; | |
214 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
48 | PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); /* For CUDA event timers */ |
215 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
48 | PetscCall(PetscCUBLASGetHandle(&cublasv2handle)); |
216 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
48 | PetscCall(SlepcMagmaInit()); |
217 | 48 | N = n*n; | |
218 | 48 | tol = PetscSqrtReal((PetscReal)n)*PETSC_MACHINE_EPSILON/2; | |
219 | |||
220 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
48 | PetscCall(PetscMalloc1(n,&piv)); |
221 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaMalloc((void **)&d_M,sizeof(PetscScalar)*N)); |
222 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaMalloc((void **)&d_M2,sizeof(PetscScalar)*N)); |
223 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaMalloc((void **)&d_G,sizeof(PetscScalar)*N)); |
224 | |||
225 | 48 | nb = magma_get_xgetri_nb(n); | |
226 | 48 | lwork = nb*n; | |
227 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaMalloc((void **)&d_work,sizeof(PetscScalar)*lwork)); |
228 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
48 | PetscCall(PetscLogGpuTimeBegin()); |
229 | |||
230 | /* M = A */ | ||
231 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaMemcpy(d_M,d_A,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice)); |
232 | |||
233 | /* scale M */ | ||
234 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
48 | PetscCallCUBLAS(cublasXnrm2(cublasv2handle,N,d_M,one,&nrm)); |
235 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
48 | if (nrm>1.0) { |
236 | 48 | sqrtnrm = PetscSqrtReal(nrm); | |
237 | 48 | alpha = 1.0/nrm; | |
238 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
48 | PetscCallCUBLAS(cublasXscal(cublasv2handle,N,&alpha,d_M,one)); |
239 | 48 | tol *= nrm; | |
240 | } | ||
241 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
48 | PetscCall(PetscInfo(fn,"||A||_F = %g, new tol: %g\n",(double)nrm,(double)tol)); |
242 | |||
243 | /* X = I */ | ||
244 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaMemset(d_A,0,sizeof(PetscScalar)*N)); |
245 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
48 | PetscCall(set_diagonal(n,d_A,ld,sone)); |
246 | |||
247 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
248 | for (it=0;it<MAXIT && !converged;it++) { |
248 | |||
249 | /* G = (5/16)*I + (1/16)*M*(15*I-5*M+M*M) */ | ||
250 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
200 | PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_M,ld,d_M,ld,&szero,d_M2,ld)); |
251 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
200 | PetscCallCUBLAS(cublasXaxpy(cublasv2handle,N,&smfive,d_M,one,d_M2,one)); |
252 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
200 | PetscCall(shift_diagonal(n,d_M2,ld,s15)); |
253 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
200 | PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&s1d16,d_M,ld,d_M2,ld,&szero,d_G,ld)); |
254 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
200 | PetscCall(shift_diagonal(n,d_G,ld,5.0/16.0)); |
255 | |||
256 | /* X = X*G */ | ||
257 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
200 | PetscCallCUDA(cudaMemcpy(d_M2,d_A,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice)); |
258 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
200 | PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_M2,ld,d_G,ld,&szero,d_A,ld)); |
259 | |||
260 | /* M = M*inv(G*G) */ | ||
261 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
200 | PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_G,ld,d_G,ld,&szero,d_M2,ld)); |
262 | /* magma */ | ||
263 |
2/6✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
200 | PetscCallMAGMA(magma_xgetrf_gpu,n,n,d_M2,ld,piv); |
264 |
2/6✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
|
200 | PetscCallMAGMA(magma_xgetri_gpu,n,d_M2,ld,piv,d_work,lwork); |
265 | /* magma */ | ||
266 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
200 | PetscCallCUDA(cudaMemcpy(d_G,d_M,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice)); |
267 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
200 | PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_G,ld,d_M2,ld,&szero,d_M,ld)); |
268 | |||
269 | /* check ||I-M|| */ | ||
270 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
200 | PetscCallCUDA(cudaMemcpy(d_M2,d_M,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice)); |
271 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
200 | PetscCall(shift_diagonal(n,d_M2,ld,-1.0)); |
272 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
200 | PetscCallCUBLAS(cublasXnrm2(cublasv2handle,N,d_M2,one,&Mres)); |
273 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
200 | PetscCheck(!PetscIsNanReal(Mres),PETSC_COMM_SELF,PETSC_ERR_FP,"The computed norm is not-a-number"); |
274 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
200 | if (Mres<=tol) converged = PETSC_TRUE; |
275 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
200 | PetscCall(PetscInfo(fn,"it: %" PetscInt_FMT " res: %g\n",it,(double)Mres)); |
276 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
200 | PetscCall(PetscLogGpuFlops(8.0*n*n*n+2.0*n*n+2.0*n*n*n/3.0+4.0*n*n*n/3.0+2.0*n*n*n+2.0*n*n)); |
277 | } | ||
278 | |||
279 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCheck(Mres<=tol,PETSC_COMM_SELF,PETSC_ERR_LIB,"SQRTM not converged after %d iterations", MAXIT); |
280 | |||
281 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
48 | if (nrm>1.0) { |
282 | 48 | alpha = sqrtnrm; | |
283 |
1/10✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
|
48 | PetscCallCUBLAS(cublasXscal(cublasv2handle,N,&alpha,d_A,one)); |
284 | } | ||
285 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
48 | PetscCall(PetscLogGpuTimeEnd()); |
286 | |||
287 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaFree(d_M)); |
288 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaFree(d_M2)); |
289 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaFree(d_G)); |
290 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | PetscCallCUDA(cudaFree(d_work)); |
291 |
2/4✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
|
48 | PetscCall(PetscFree(piv)); |
292 | PetscFunctionReturn(PETSC_SUCCESS); | ||
293 | } | ||
294 | #endif /* PETSC_HAVE_MAGMA */ | ||
295 | #endif /* PETSC_HAVE_CUDA */ | ||
296 | |||
297 | 126 | static PetscErrorCode FNEvaluateFunctionMat_Sqrt_Sadeghi(FN fn,Mat A,Mat B) | |
298 | { | ||
299 | 126 | PetscBLASInt n=0; | |
300 | 126 | PetscScalar *Ba; | |
301 | 126 | PetscInt m; | |
302 | |||
303 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
126 | PetscFunctionBegin; |
304 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
126 | if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN)); |
305 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatDenseGetArray(B,&Ba)); |
306 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatGetSize(A,&m,NULL)); |
307 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(PetscBLASIntCast(m,&n)); |
308 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(FNSqrtmSadeghi(fn,n,Ba,n)); |
309 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
126 | PetscCall(MatDenseRestoreArray(B,&Ba)); |
310 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
24 | PetscFunctionReturn(PETSC_SUCCESS); |
311 | } | ||
312 | |||
313 | #if defined(PETSC_HAVE_CUDA) | ||
314 | 24 | PetscErrorCode FNEvaluateFunctionMat_Sqrt_NS_CUDA(FN fn,Mat A,Mat B) | |
315 | { | ||
316 | 24 | PetscBLASInt n=0; | |
317 | 24 | PetscScalar *Ba; | |
318 | 24 | PetscInt m; | |
319 | |||
320 | 24 | PetscFunctionBegin; | |
321 |
2/4✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
|
24 | if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN)); |
322 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatDenseCUDAGetArray(B,&Ba)); |
323 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatGetSize(A,&m,NULL)); |
324 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(PetscBLASIntCast(m,&n)); |
325 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(FNSqrtmNewtonSchulz_CUDA(fn,n,Ba,n,PETSC_FALSE)); |
326 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatDenseCUDARestoreArray(B,&Ba)); |
327 | PetscFunctionReturn(PETSC_SUCCESS); | ||
328 | } | ||
329 | |||
330 | #if defined(PETSC_HAVE_MAGMA) | ||
331 | 24 | PetscErrorCode FNEvaluateFunctionMat_Sqrt_DBP_CUDAm(FN fn,Mat A,Mat B) | |
332 | { | ||
333 | 24 | PetscBLASInt n=0; | |
334 | 24 | PetscScalar *T; | |
335 | 24 | PetscInt m; | |
336 | |||
337 | 24 | PetscFunctionBegin; | |
338 |
2/4✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
|
24 | if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN)); |
339 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatDenseCUDAGetArray(B,&T)); |
340 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatGetSize(A,&m,NULL)); |
341 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(PetscBLASIntCast(m,&n)); |
342 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(FNSqrtmDenmanBeavers_CUDAm(fn,n,T,n,PETSC_FALSE)); |
343 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatDenseCUDARestoreArray(B,&T)); |
344 | PetscFunctionReturn(PETSC_SUCCESS); | ||
345 | } | ||
346 | |||
347 | 24 | PetscErrorCode FNEvaluateFunctionMat_Sqrt_Sadeghi_CUDAm(FN fn,Mat A,Mat B) | |
348 | { | ||
349 | 24 | PetscBLASInt n=0; | |
350 | 24 | PetscScalar *Ba; | |
351 | 24 | PetscInt m; | |
352 | |||
353 | 24 | PetscFunctionBegin; | |
354 |
2/4✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
|
24 | if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN)); |
355 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatDenseCUDAGetArray(B,&Ba)); |
356 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatGetSize(A,&m,NULL)); |
357 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(PetscBLASIntCast(m,&n)); |
358 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(FNSqrtmSadeghi_CUDAm(fn,n,Ba,n)); |
359 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
24 | PetscCall(MatDenseCUDARestoreArray(B,&Ba)); |
360 | PetscFunctionReturn(PETSC_SUCCESS); | ||
361 | } | ||
362 | #endif /* PETSC_HAVE_MAGMA */ | ||
363 | #endif /* PETSC_HAVE_CUDA */ | ||
364 | |||
365 | 194 | static PetscErrorCode FNView_Sqrt(FN fn,PetscViewer viewer) | |
366 | { | ||
367 | 194 | PetscBool isascii; | |
368 | 194 | char str[50]; | |
369 | 194 | const char *methodname[] = { | |
370 | "Schur method for the square root", | ||
371 | "Denman-Beavers (product form)", | ||
372 | "Newton-Schulz iteration", | ||
373 | "Sadeghi iteration" | ||
374 | }; | ||
375 | 194 | const int nmeth=PETSC_STATIC_ARRAY_LENGTH(methodname); | |
376 | |||
377 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
194 | PetscFunctionBegin; |
378 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
194 | PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii)); |
379 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
194 | if (isascii) { |
380 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
194 | if (fn->beta==(PetscScalar)1.0) { |
381 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
18 | if (fn->alpha==(PetscScalar)1.0) PetscCall(PetscViewerASCIIPrintf(viewer," square root: sqrt(x)\n")); |
382 | else { | ||
383 | ✗ | PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE)); | |
384 | ✗ | PetscCall(PetscViewerASCIIPrintf(viewer," square root: sqrt(%s*x)\n",str)); | |
385 | } | ||
386 | } else { | ||
387 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
176 | PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE)); |
388 |
1/8✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
176 | if (fn->alpha==(PetscScalar)1.0) PetscCall(PetscViewerASCIIPrintf(viewer," square root: %s*sqrt(x)\n",str)); |
389 | else { | ||
390 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
176 | PetscCall(PetscViewerASCIIPrintf(viewer," square root: %s",str)); |
391 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
176 | PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE)); |
392 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
176 | PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE)); |
393 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
176 | PetscCall(PetscViewerASCIIPrintf(viewer,"*sqrt(%s*x)\n",str)); |
394 |
4/6✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
|
176 | PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE)); |
395 | } | ||
396 | } | ||
397 |
5/8✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
|
194 | if (fn->method<nmeth) PetscCall(PetscViewerASCIIPrintf(viewer," computing matrix functions with: %s\n",methodname[fn->method])); |
398 | } | ||
399 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
31 | PetscFunctionReturn(PETSC_SUCCESS); |
400 | } | ||
401 | |||
402 | 368 | SLEPC_EXTERN PetscErrorCode FNCreate_Sqrt(FN fn) | |
403 | { | ||
404 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
368 | PetscFunctionBegin; |
405 | 368 | fn->ops->evaluatefunction = FNEvaluateFunction_Sqrt; | |
406 | 368 | fn->ops->evaluatederivative = FNEvaluateDerivative_Sqrt; | |
407 | 368 | fn->ops->evaluatefunctionmat[0] = FNEvaluateFunctionMat_Sqrt_Schur; | |
408 | 368 | fn->ops->evaluatefunctionmat[1] = FNEvaluateFunctionMat_Sqrt_DBP; | |
409 | 368 | fn->ops->evaluatefunctionmat[2] = FNEvaluateFunctionMat_Sqrt_NS; | |
410 | 368 | fn->ops->evaluatefunctionmat[3] = FNEvaluateFunctionMat_Sqrt_Sadeghi; | |
411 | #if defined(PETSC_HAVE_CUDA) | ||
412 | 82 | fn->ops->evaluatefunctionmatcuda[2] = FNEvaluateFunctionMat_Sqrt_NS_CUDA; | |
413 | #if defined(PETSC_HAVE_MAGMA) | ||
414 | 82 | fn->ops->evaluatefunctionmatcuda[1] = FNEvaluateFunctionMat_Sqrt_DBP_CUDAm; | |
415 | 82 | fn->ops->evaluatefunctionmatcuda[3] = FNEvaluateFunctionMat_Sqrt_Sadeghi_CUDAm; | |
416 | #endif /* PETSC_HAVE_MAGMA */ | ||
417 | #endif /* PETSC_HAVE_CUDA */ | ||
418 | 368 | fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Sqrt_Schur; | |
419 | 368 | fn->ops->view = FNView_Sqrt; | |
420 |
6/12✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
|
368 | PetscFunctionReturn(PETSC_SUCCESS); |
421 | } | ||
422 |