GCC Code Coverage Report


Directory: ./
File: src/sys/classes/fn/impls/sqrt/fnsqrt.c
Date: 2025-10-04 04:19:13
Exec Total Coverage
Lines: 255 257 99.2%
Functions: 14 14 100.0%
Branches: 496 1020 48.6%

Line Branch Exec Source
1 /*
2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 SLEPc - Scalable Library for Eigenvalue Problem Computations
4 Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5
6 This file is part of SLEPc.
7 SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 */
10 /*
11 Square root function sqrt(x)
12 */
13
14 #include <slepc/private/fnimpl.h> /*I "slepcfn.h" I*/
15 #include <slepcblaslapack.h>
16
17 12077 static PetscErrorCode FNEvaluateFunction_Sqrt(FN fn,PetscScalar x,PetscScalar *y)
18 {
19
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
12077 PetscFunctionBegin;
20 #if !defined(PETSC_USE_COMPLEX)
21
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
4902 PetscCheck(x>=0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Function not defined in the requested value");
22 #endif
23 12077 *y = PetscSqrtScalar(x);
24
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
12077 PetscFunctionReturn(PETSC_SUCCESS);
25 }
26
27 690 static PetscErrorCode FNEvaluateDerivative_Sqrt(FN fn,PetscScalar x,PetscScalar *y)
28 {
29
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
690 PetscFunctionBegin;
30
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
690 PetscCheck(x!=0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value");
31 #if !defined(PETSC_USE_COMPLEX)
32
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
140 PetscCheck(x>0.0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value");
33 #endif
34 690 *y = 1.0/(2.0*PetscSqrtScalar(x));
35
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
690 PetscFunctionReturn(PETSC_SUCCESS);
36 }
37
38 206 static PetscErrorCode FNEvaluateFunctionMat_Sqrt_Schur(FN fn,Mat A,Mat B)
39 {
40 206 PetscBLASInt n=0;
41 206 PetscScalar *T;
42 206 PetscInt m;
43
44
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
206 PetscFunctionBegin;
45
5/8
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
206 if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN));
46
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
206 PetscCall(MatDenseGetArray(B,&T));
47
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
206 PetscCall(MatGetSize(A,&m,NULL));
48
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
206 PetscCall(PetscBLASIntCast(m,&n));
49
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
206 PetscCall(FNSqrtmSchur(fn,n,T,n,PETSC_FALSE));
50
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
206 PetscCall(MatDenseRestoreArray(B,&T));
51
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
40 PetscFunctionReturn(PETSC_SUCCESS);
52 }
53
54 174 static PetscErrorCode FNEvaluateFunctionMatVec_Sqrt_Schur(FN fn,Mat A,Vec v)
55 {
56 174 PetscBLASInt n=0;
57 174 PetscScalar *T;
58 174 PetscInt m;
59 174 Mat B;
60
61
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
174 PetscFunctionBegin;
62
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
174 PetscCall(FN_AllocateWorkMat(fn,A,&B));
63
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
174 PetscCall(MatDenseGetArray(B,&T));
64
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
174 PetscCall(MatGetSize(A,&m,NULL));
65
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
174 PetscCall(PetscBLASIntCast(m,&n));
66
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
174 PetscCall(FNSqrtmSchur(fn,n,T,n,PETSC_TRUE));
67
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
174 PetscCall(MatDenseRestoreArray(B,&T));
68
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
174 PetscCall(MatGetColumnVector(B,v,0));
69
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
174 PetscCall(FN_FreeWorkMat(fn,&B));
70
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
30 PetscFunctionReturn(PETSC_SUCCESS);
71 }
72
73 126 static PetscErrorCode FNEvaluateFunctionMat_Sqrt_DBP(FN fn,Mat A,Mat B)
74 {
75 126 PetscBLASInt n=0;
76 126 PetscScalar *T;
77 126 PetscInt m;
78
79
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
126 PetscFunctionBegin;
80
5/8
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
126 if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN));
81
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
126 PetscCall(MatDenseGetArray(B,&T));
82
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
126 PetscCall(MatGetSize(A,&m,NULL));
83
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
126 PetscCall(PetscBLASIntCast(m,&n));
84
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
126 PetscCall(FNSqrtmDenmanBeavers(fn,n,T,n,PETSC_FALSE));
85
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
126 PetscCall(MatDenseRestoreArray(B,&T));
86
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
24 PetscFunctionReturn(PETSC_SUCCESS);
87 }
88
89 126 static PetscErrorCode FNEvaluateFunctionMat_Sqrt_NS(FN fn,Mat A,Mat B)
90 {
91 126 PetscBLASInt n=0;
92 126 PetscScalar *Ba;
93 126 PetscInt m;
94
95
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
126 PetscFunctionBegin;
96
5/8
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
126 if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN));
97
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
126 PetscCall(MatDenseGetArray(B,&Ba));
98
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
126 PetscCall(MatGetSize(A,&m,NULL));
99
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
126 PetscCall(PetscBLASIntCast(m,&n));
100
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
126 PetscCall(FNSqrtmNewtonSchulz(fn,n,Ba,n,PETSC_FALSE));
101
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
126 PetscCall(MatDenseRestoreArray(B,&Ba));
102
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
24 PetscFunctionReturn(PETSC_SUCCESS);
103 }
104
105 #define MAXIT 50
106
107 /*
108 Computes the principal square root of the matrix A using the
109 Sadeghi iteration. A is overwritten with sqrtm(A).
110 */
111 246 PetscErrorCode FNSqrtmSadeghi(FN fn,PetscBLASInt n,PetscScalar *A,PetscBLASInt ld)
112 {
113 246 PetscScalar *M,*M2,*G,*X=A,*work,work1,sqrtnrm;
114 246 PetscScalar szero=0.0,sone=1.0,smfive=-5.0,s1d16=1.0/16.0;
115 246 PetscReal tol,Mres=0.0,nrm,rwork[1],done=1.0;
116 246 PetscInt i,it;
117 246 PetscBLASInt N,*piv=NULL,info,lwork=0,query=-1,one=1,zero=0;
118 246 PetscBool converged=PETSC_FALSE;
119 246 unsigned int ftz;
120
121
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
246 PetscFunctionBegin;
122 246 N = n*n;
123 246 tol = PetscSqrtReal((PetscReal)n)*PETSC_MACHINE_EPSILON/2;
124
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(SlepcSetFlushToZero(&ftz));
125
126 /* query work size */
127
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
246 PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,piv,&work1,&query,&info));
128
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(work1),&lwork));
129
130
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(PetscMalloc5(N,&M,N,&M2,N,&G,lwork,&work,n,&piv));
131
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(PetscArraycpy(M,A,N));
132
133 /* scale M */
134 246 nrm = LAPACKlange_("fro",&n,&n,M,&n,rwork);
135
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
246 if (nrm>1.0) {
136 246 sqrtnrm = PetscSqrtReal(nrm);
137
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
246 PetscCallBLAS("LAPACKlascl",LAPACKlascl_("G",&zero,&zero,&nrm,&done,&N,&one,M,&N,&info));
138
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
246 SlepcCheckLapackInfo("lascl",info);
139 246 tol *= nrm;
140 }
141
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(PetscInfo(fn,"||A||_F = %g, new tol: %g\n",(double)nrm,(double)tol));
142
143 /* X = I */
144
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(PetscArrayzero(X,N));
145
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
13560 for (i=0;i<n;i++) X[i+i*ld] = 1.0;
146
147
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1294 for (it=0;it<MAXIT && !converged;it++) {
148
149 /* G = (5/16)*I + (1/16)*M*(15*I-5*M+M*M) */
150
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
1048 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,M,&ld,M,&ld,&szero,M2,&ld));
151
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
1048 PetscCallBLAS("BLASaxpy",BLASaxpy_(&N,&smfive,M,&one,M2,&one));
152
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
63656 for (i=0;i<n;i++) M2[i+i*ld] += 15.0;
153
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
1048 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&s1d16,M,&ld,M2,&ld,&szero,G,&ld));
154
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
63656 for (i=0;i<n;i++) G[i+i*ld] += 5.0/16.0;
155
156 /* X = X*G */
157
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1048 PetscCall(PetscArraycpy(M2,X,N));
158
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
1048 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,M2,&ld,G,&ld,&szero,X,&ld));
159
160 /* M = M*inv(G*G) */
161
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
1048 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,G,&ld,G,&ld,&szero,M2,&ld));
162
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
1048 PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,M2,&ld,piv,&info));
163
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
1048 SlepcCheckLapackInfo("getrf",info);
164
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
1048 PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n,M2,&ld,piv,work,&lwork,&info));
165
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
1048 SlepcCheckLapackInfo("getri",info);
166
167
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1048 PetscCall(PetscArraycpy(G,M,N));
168
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
1048 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,G,&ld,M2,&ld,&szero,M,&ld));
169
170 /* check ||I-M|| */
171
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1048 PetscCall(PetscArraycpy(M2,M,N));
172
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
63656 for (i=0;i<n;i++) M2[i+i*ld] -= 1.0;
173 1048 Mres = LAPACKlange_("fro",&n,&n,M2,&n,rwork);
174
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
1048 PetscCheck(!PetscIsNanReal(Mres),PETSC_COMM_SELF,PETSC_ERR_FP,"The computed norm is not-a-number");
175
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1048 if (Mres<=tol) converged = PETSC_TRUE;
176
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1048 PetscCall(PetscInfo(fn,"it: %" PetscInt_FMT " res: %g\n",it,(double)Mres));
177
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1048 PetscCall(PetscLogFlops(8.0*n*n*n+2.0*n*n+2.0*n*n*n/3.0+4.0*n*n*n/3.0+2.0*n*n*n+2.0*n*n));
178 }
179
180
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
246 PetscCheck(Mres<=tol,PETSC_COMM_SELF,PETSC_ERR_LIB,"SQRTM not converged after %d iterations",MAXIT);
181
182 /* undo scaling */
183
11/22
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 2 times.
✓ Branch 10 taken 2 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✓ Branch 13 taken 2 times.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✓ Branch 16 taken 2 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✓ Branch 19 taken 2 times.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
246 if (nrm>1.0) PetscCallBLAS("BLASscal",BLASscal_(&N,&sqrtnrm,A,&one));
184
185
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(PetscFree5(M,M2,G,work,piv));
186
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
73 PetscCall(SlepcResetFlushToZero(&ftz));
187
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
48 PetscFunctionReturn(PETSC_SUCCESS);
188 }
189
190 #if defined(PETSC_HAVE_CUDA)
191 #include "../src/sys/classes/fn/impls/cuda/fnutilcuda.h"
192 #include <slepccupmblas.h>
193
194 #if defined(PETSC_HAVE_MAGMA)
195 #include <slepcmagma.h>
196
197 /*
198 * Matrix square root by Sadeghi iteration. CUDA version.
199 * Computes the principal square root of the matrix A using the
200 * Sadeghi iteration. A is overwritten with sqrtm(A).
201 */
202 48 PetscErrorCode FNSqrtmSadeghi_CUDAm(FN fn,PetscBLASInt n,PetscScalar *d_A,PetscBLASInt ld)
203 {
204 48 PetscScalar *d_M,*d_M2,*d_G,*d_work,alpha;
205 48 const PetscScalar szero=0.0,sone=1.0,smfive=-5.0,s15=15.0,s1d16=1.0/16.0;
206 48 PetscReal tol,Mres=0.0,nrm,sqrtnrm=1.0;
207 48 PetscInt it,nb,lwork;
208 48 PetscBLASInt *piv,N;
209 48 const PetscBLASInt one=1;
210 48 PetscBool converged=PETSC_FALSE;
211 48 cublasHandle_t cublasv2handle;
212
213 48 PetscFunctionBegin;
214
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); /* For CUDA event timers */
215
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscCUBLASGetHandle(&cublasv2handle));
216
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(SlepcMagmaInit());
217 48 N = n*n;
218 48 tol = PetscSqrtReal((PetscReal)n)*PETSC_MACHINE_EPSILON/2;
219
220
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscMalloc1(n,&piv));
221
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMalloc((void **)&d_M,sizeof(PetscScalar)*N));
222
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMalloc((void **)&d_M2,sizeof(PetscScalar)*N));
223
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMalloc((void **)&d_G,sizeof(PetscScalar)*N));
224
225 48 nb = magma_get_xgetri_nb(n);
226 48 lwork = nb*n;
227
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMalloc((void **)&d_work,sizeof(PetscScalar)*lwork));
228
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscLogGpuTimeBegin());
229
230 /* M = A */
231
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMemcpy(d_M,d_A,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice));
232
233 /* scale M */
234
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
48 PetscCallCUBLAS(cublasXnrm2(cublasv2handle,N,d_M,one,&nrm));
235
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
48 if (nrm>1.0) {
236 48 sqrtnrm = PetscSqrtReal(nrm);
237 48 alpha = 1.0/nrm;
238
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
48 PetscCallCUBLAS(cublasXscal(cublasv2handle,N,&alpha,d_M,one));
239 48 tol *= nrm;
240 }
241
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscInfo(fn,"||A||_F = %g, new tol: %g\n",(double)nrm,(double)tol));
242
243 /* X = I */
244
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMemset(d_A,0,sizeof(PetscScalar)*N));
245
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(set_diagonal(n,d_A,ld,sone));
246
247
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
248 for (it=0;it<MAXIT && !converged;it++) {
248
249 /* G = (5/16)*I + (1/16)*M*(15*I-5*M+M*M) */
250
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
200 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_M,ld,d_M,ld,&szero,d_M2,ld));
251
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
200 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,N,&smfive,d_M,one,d_M2,one));
252
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
200 PetscCall(shift_diagonal(n,d_M2,ld,s15));
253
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
200 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&s1d16,d_M,ld,d_M2,ld,&szero,d_G,ld));
254
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
200 PetscCall(shift_diagonal(n,d_G,ld,5.0/16.0));
255
256 /* X = X*G */
257
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
200 PetscCallCUDA(cudaMemcpy(d_M2,d_A,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice));
258
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
200 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_M2,ld,d_G,ld,&szero,d_A,ld));
259
260 /* M = M*inv(G*G) */
261
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
200 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_G,ld,d_G,ld,&szero,d_M2,ld));
262 /* magma */
263
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
200 PetscCallMAGMA(magma_xgetrf_gpu,n,n,d_M2,ld,piv);
264
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
200 PetscCallMAGMA(magma_xgetri_gpu,n,d_M2,ld,piv,d_work,lwork);
265 /* magma */
266
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
200 PetscCallCUDA(cudaMemcpy(d_G,d_M,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice));
267
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
200 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_G,ld,d_M2,ld,&szero,d_M,ld));
268
269 /* check ||I-M|| */
270
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
200 PetscCallCUDA(cudaMemcpy(d_M2,d_M,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice));
271
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
200 PetscCall(shift_diagonal(n,d_M2,ld,-1.0));
272
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
200 PetscCallCUBLAS(cublasXnrm2(cublasv2handle,N,d_M2,one,&Mres));
273
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
200 PetscCheck(!PetscIsNanReal(Mres),PETSC_COMM_SELF,PETSC_ERR_FP,"The computed norm is not-a-number");
274
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
200 if (Mres<=tol) converged = PETSC_TRUE;
275
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
200 PetscCall(PetscInfo(fn,"it: %" PetscInt_FMT " res: %g\n",it,(double)Mres));
276
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
200 PetscCall(PetscLogGpuFlops(8.0*n*n*n+2.0*n*n+2.0*n*n*n/3.0+4.0*n*n*n/3.0+2.0*n*n*n+2.0*n*n));
277 }
278
279
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCheck(Mres<=tol,PETSC_COMM_SELF,PETSC_ERR_LIB,"SQRTM not converged after %d iterations", MAXIT);
280
281
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
48 if (nrm>1.0) {
282 48 alpha = sqrtnrm;
283
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
48 PetscCallCUBLAS(cublasXscal(cublasv2handle,N,&alpha,d_A,one));
284 }
285
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscLogGpuTimeEnd());
286
287
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaFree(d_M));
288
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaFree(d_M2));
289
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaFree(d_G));
290
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaFree(d_work));
291
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
48 PetscCall(PetscFree(piv));
292 PetscFunctionReturn(PETSC_SUCCESS);
293 }
294 #endif /* PETSC_HAVE_MAGMA */
295 #endif /* PETSC_HAVE_CUDA */
296
297 126 static PetscErrorCode FNEvaluateFunctionMat_Sqrt_Sadeghi(FN fn,Mat A,Mat B)
298 {
299 126 PetscBLASInt n=0;
300 126 PetscScalar *Ba;
301 126 PetscInt m;
302
303
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
126 PetscFunctionBegin;
304
5/8
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
126 if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN));
305
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
126 PetscCall(MatDenseGetArray(B,&Ba));
306
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
126 PetscCall(MatGetSize(A,&m,NULL));
307
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
126 PetscCall(PetscBLASIntCast(m,&n));
308
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
126 PetscCall(FNSqrtmSadeghi(fn,n,Ba,n));
309
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
126 PetscCall(MatDenseRestoreArray(B,&Ba));
310
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
24 PetscFunctionReturn(PETSC_SUCCESS);
311 }
312
313 #if defined(PETSC_HAVE_CUDA)
314 24 PetscErrorCode FNEvaluateFunctionMat_Sqrt_NS_CUDA(FN fn,Mat A,Mat B)
315 {
316 24 PetscBLASInt n=0;
317 24 PetscScalar *Ba;
318 24 PetscInt m;
319
320 24 PetscFunctionBegin;
321
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
24 if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN));
322
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
24 PetscCall(MatDenseCUDAGetArray(B,&Ba));
323
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
24 PetscCall(MatGetSize(A,&m,NULL));
324
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
24 PetscCall(PetscBLASIntCast(m,&n));
325
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
24 PetscCall(FNSqrtmNewtonSchulz_CUDA(fn,n,Ba,n,PETSC_FALSE));
326
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
24 PetscCall(MatDenseCUDARestoreArray(B,&Ba));
327 PetscFunctionReturn(PETSC_SUCCESS);
328 }
329
330 #if defined(PETSC_HAVE_MAGMA)
331 24 PetscErrorCode FNEvaluateFunctionMat_Sqrt_DBP_CUDAm(FN fn,Mat A,Mat B)
332 {
333 24 PetscBLASInt n=0;
334 24 PetscScalar *T;
335 24 PetscInt m;
336
337 24 PetscFunctionBegin;
338
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
24 if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN));
339
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
24 PetscCall(MatDenseCUDAGetArray(B,&T));
340
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
24 PetscCall(MatGetSize(A,&m,NULL));
341
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
24 PetscCall(PetscBLASIntCast(m,&n));
342
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
24 PetscCall(FNSqrtmDenmanBeavers_CUDAm(fn,n,T,n,PETSC_FALSE));
343
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
24 PetscCall(MatDenseCUDARestoreArray(B,&T));
344 PetscFunctionReturn(PETSC_SUCCESS);
345 }
346
347 24 PetscErrorCode FNEvaluateFunctionMat_Sqrt_Sadeghi_CUDAm(FN fn,Mat A,Mat B)
348 {
349 24 PetscBLASInt n=0;
350 24 PetscScalar *Ba;
351 24 PetscInt m;
352
353 24 PetscFunctionBegin;
354
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
24 if (A!=B) PetscCall(MatCopy(A,B,SAME_NONZERO_PATTERN));
355
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
24 PetscCall(MatDenseCUDAGetArray(B,&Ba));
356
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
24 PetscCall(MatGetSize(A,&m,NULL));
357
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
24 PetscCall(PetscBLASIntCast(m,&n));
358
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
24 PetscCall(FNSqrtmSadeghi_CUDAm(fn,n,Ba,n));
359
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
24 PetscCall(MatDenseCUDARestoreArray(B,&Ba));
360 PetscFunctionReturn(PETSC_SUCCESS);
361 }
362 #endif /* PETSC_HAVE_MAGMA */
363 #endif /* PETSC_HAVE_CUDA */
364
365 194 static PetscErrorCode FNView_Sqrt(FN fn,PetscViewer viewer)
366 {
367 194 PetscBool isascii;
368 194 char str[50];
369 194 const char *methodname[] = {
370 "Schur method for the square root",
371 "Denman-Beavers (product form)",
372 "Newton-Schulz iteration",
373 "Sadeghi iteration"
374 };
375 194 const int nmeth=PETSC_STATIC_ARRAY_LENGTH(methodname);
376
377
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
194 PetscFunctionBegin;
378
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
194 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii));
379
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
194 if (isascii) {
380
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
194 if (fn->beta==(PetscScalar)1.0) {
381
5/8
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
18 if (fn->alpha==(PetscScalar)1.0) PetscCall(PetscViewerASCIIPrintf(viewer," square root: sqrt(x)\n"));
382 else {
383 PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE));
384 PetscCall(PetscViewerASCIIPrintf(viewer," square root: sqrt(%s*x)\n",str));
385 }
386 } else {
387
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
176 PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE));
388
1/8
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
176 if (fn->alpha==(PetscScalar)1.0) PetscCall(PetscViewerASCIIPrintf(viewer," square root: %s*sqrt(x)\n",str));
389 else {
390
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
176 PetscCall(PetscViewerASCIIPrintf(viewer," square root: %s",str));
391
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
176 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
392
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
176 PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE));
393
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
176 PetscCall(PetscViewerASCIIPrintf(viewer,"*sqrt(%s*x)\n",str));
394
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
176 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
395 }
396 }
397
5/8
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
194 if (fn->method<nmeth) PetscCall(PetscViewerASCIIPrintf(viewer," computing matrix functions with: %s\n",methodname[fn->method]));
398 }
399
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
31 PetscFunctionReturn(PETSC_SUCCESS);
400 }
401
402 368 SLEPC_EXTERN PetscErrorCode FNCreate_Sqrt(FN fn)
403 {
404
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
368 PetscFunctionBegin;
405 368 fn->ops->evaluatefunction = FNEvaluateFunction_Sqrt;
406 368 fn->ops->evaluatederivative = FNEvaluateDerivative_Sqrt;
407 368 fn->ops->evaluatefunctionmat[0] = FNEvaluateFunctionMat_Sqrt_Schur;
408 368 fn->ops->evaluatefunctionmat[1] = FNEvaluateFunctionMat_Sqrt_DBP;
409 368 fn->ops->evaluatefunctionmat[2] = FNEvaluateFunctionMat_Sqrt_NS;
410 368 fn->ops->evaluatefunctionmat[3] = FNEvaluateFunctionMat_Sqrt_Sadeghi;
411 #if defined(PETSC_HAVE_CUDA)
412 82 fn->ops->evaluatefunctionmatcuda[2] = FNEvaluateFunctionMat_Sqrt_NS_CUDA;
413 #if defined(PETSC_HAVE_MAGMA)
414 82 fn->ops->evaluatefunctionmatcuda[1] = FNEvaluateFunctionMat_Sqrt_DBP_CUDAm;
415 82 fn->ops->evaluatefunctionmatcuda[3] = FNEvaluateFunctionMat_Sqrt_Sadeghi_CUDAm;
416 #endif /* PETSC_HAVE_MAGMA */
417 #endif /* PETSC_HAVE_CUDA */
418 368 fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Sqrt_Schur;
419 368 fn->ops->view = FNView_Sqrt;
420
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
368 PetscFunctionReturn(PETSC_SUCCESS);
421 }
422