GCC Code Coverage Report


Directory: ./
File: src/sys/classes/fn/impls/exp/fnexp.c
Date: 2025-10-03 04:28:47
Exec Total Coverage
Lines: 985 1206 81.7%
Functions: 16 17 94.1%
Branches: 1801 4442 40.5%

Line Branch Exec Source
1 /*
2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 SLEPc - Scalable Library for Eigenvalue Problem Computations
4 Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5
6 This file is part of SLEPc.
7 SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 */
10 /*
11 Exponential function exp(x)
12 */
13
14 #include <slepc/private/fnimpl.h> /*I "slepcfn.h" I*/
15 #include <slepcblaslapack.h>
16
17 28238 static PetscErrorCode FNEvaluateFunction_Exp(FN fn,PetscScalar x,PetscScalar *y)
18 {
19
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
28238 PetscFunctionBegin;
20 28238 *y = PetscExpScalar(x);
21
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
28238 PetscFunctionReturn(PETSC_SUCCESS);
22 }
23
24 8907 static PetscErrorCode FNEvaluateDerivative_Exp(FN fn,PetscScalar x,PetscScalar *y)
25 {
26
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
8907 PetscFunctionBegin;
27 8907 *y = PetscExpScalar(x);
28
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
8907 PetscFunctionReturn(PETSC_SUCCESS);
29 }
30
31 #define MAX_PADE 6
32
33 160 static PetscErrorCode FNEvaluateFunctionMat_Exp_Pade(FN fn,Mat A,Mat B)
34 {
35 160 PetscBLASInt n=0,ld,ld2,*ipiv,info,inc=1;
36 160 PetscInt m,j,k,sexp;
37 160 PetscBool odd;
38 160 const PetscInt p=MAX_PADE;
39 160 PetscReal c[MAX_PADE+1],s,*rwork;
40 160 PetscScalar scale,mone=-1.0,one=1.0,two=2.0,zero=0.0;
41 160 PetscScalar *Ba,*As,*A2,*Q,*P,*W,*aux;
42 160 const PetscScalar *Aa;
43
44
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
160 PetscFunctionBegin;
45
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
160 PetscCall(MatDenseGetArrayRead(A,&Aa));
46
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
160 PetscCall(MatDenseGetArray(B,&Ba));
47
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
160 PetscCall(MatGetSize(A,&m,NULL));
48
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
160 PetscCall(PetscBLASIntCast(m,&n));
49 160 ld = n;
50 160 ld2 = ld*ld;
51 160 P = Ba;
52
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
160 PetscCall(PetscMalloc6(m*m,&Q,m*m,&W,m*m,&As,m*m,&A2,ld,&rwork,ld,&ipiv));
53
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
160 PetscCall(PetscArraycpy(As,Aa,ld2));
54
55 /* Pade' coefficients */
56 160 c[0] = 1.0;
57
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1120 for (k=1;k<=p;k++) c[k] = c[k-1]*(p+1-k)/(k*(2*p+1-k));
58
59 /* Scaling */
60 160 s = LAPACKlange_("I",&n,&n,As,&ld,rwork);
61
3/6
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
160 PetscCall(PetscLogFlops(1.0*n*n));
62
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
160 if (s>0.5) {
63
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
160 sexp = PetscMax(0,(int)(PetscLogReal(s)/PetscLogReal(2.0))+2);
64 160 scale = PetscPowRealInt(2.0,-sexp);
65
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
160 PetscCallBLAS("BLASscal",BLASscal_(&ld2,&scale,As,&inc));
66
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
160 PetscCall(PetscLogFlops(1.0*n*n));
67 } else sexp = 0;
68
69 /* Horner evaluation */
70
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
160 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,As,&ld,As,&ld,&zero,A2,&ld));
71
3/6
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
160 PetscCall(PetscLogFlops(2.0*n*n*n));
72
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
160 PetscCall(PetscArrayzero(Q,ld2));
73
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
160 PetscCall(PetscArrayzero(P,ld2));
74
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
6160 for (j=0;j<n;j++) {
75 6000 Q[j+j*ld] = c[p];
76 6000 P[j+j*ld] = c[p-1];
77 }
78
79 odd = PETSC_TRUE;
80
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
960 for (k=p-1;k>0;k--) {
81
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
800 if (odd) {
82
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
480 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,A2,&ld,&zero,W,&ld));
83 480 SlepcSwap(Q,W,aux);
84
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
18480 for (j=0;j<n;j++) Q[j+j*ld] += c[k-1];
85 odd = PETSC_FALSE;
86 } else {
87
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
320 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,A2,&ld,&zero,W,&ld));
88 320 SlepcSwap(P,W,aux);
89
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
12320 for (j=0;j<n;j++) P[j+j*ld] += c[k-1];
90 odd = PETSC_TRUE;
91 }
92
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
800 PetscCall(PetscLogFlops(2.0*n*n*n));
93 }
94 /*if (odd) {
95 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,As,&ld,&zero,W,&ld));
96 SlepcSwap(Q,W,aux);
97 PetscCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc));
98 PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info));
99 SlepcCheckLapackInfo("gesv",info);
100 PetscCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc));
101 for (j=0;j<n;j++) P[j+j*ld] += 1.0;
102 PetscCallBLAS("BLASscal",BLASscal_(&ld2,&mone,P,&inc));
103 } else {*/
104
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
160 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,As,&ld,&zero,W,&ld));
105 160 SlepcSwap(P,W,aux);
106
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
160 PetscCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc));
107
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
160 PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info));
108
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
160 SlepcCheckLapackInfo("gesv",info);
109
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
160 PetscCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc));
110
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
6160 for (j=0;j<n;j++) P[j+j*ld] += 1.0;
111 /*}*/
112
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
160 PetscCall(PetscLogFlops(2.0*n*n*n+2.0*n*n*n/3.0+4.0*n*n));
113
114
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
680 for (k=1;k<=sexp;k++) {
115
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
520 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,P,&ld,&zero,W,&ld));
116
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
520 PetscCall(PetscArraycpy(P,W,ld2));
117 }
118
1/8
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
160 if (P!=Ba) PetscCall(PetscArraycpy(Ba,P,ld2));
119
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
160 PetscCall(PetscLogFlops(2.0*n*n*n*sexp));
120
121
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
160 PetscCall(PetscFree6(Q,W,As,A2,rwork,ipiv));
122
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
160 PetscCall(MatDenseRestoreArrayRead(A,&Aa));
123
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
160 PetscCall(MatDenseRestoreArray(B,&Ba));
124
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
32 PetscFunctionReturn(PETSC_SUCCESS);
125 }
126
127 #if defined(PETSC_HAVE_COMPLEX)
128 /*
129 * Set scaling factor (s) and Pade degree (k,m)
130 */
131 512 static PetscErrorCode sexpm_params(PetscReal nrm,PetscInt *s,PetscInt *k,PetscInt *m)
132 {
133
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
512 PetscFunctionBegin;
134
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
512 if (nrm>1) {
135
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
352 if (nrm<200) {*s = 4; *k = 5; *m = *k-1;}
136
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
40 else if (nrm<1e4) {*s = 4; *k = 4; *m = *k+1;}
137 else if (nrm<1e6) {*s = 4; *k = 3; *m = *k+1;}
138 else if (nrm<1e9) {*s = 3; *k = 3; *m = *k+1;}
139 else if (nrm<1e11) {*s = 2; *k = 3; *m = *k+1;}
140 else if (nrm<1e12) {*s = 2; *k = 2; *m = *k+1;}
141 else if (nrm<1e14) {*s = 2; *k = 1; *m = *k+1;}
142 else {*s = 1; *k = 1; *m = *k+1;}
143 } else { /* nrm<1 */
144
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
160 if (nrm>0.5) {*s = 4; *k = 4; *m = *k-1;}
145
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
120 else if (nrm>0.3) {*s = 3; *k = 4; *m = *k-1;}
146
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
80 else if (nrm>0.15) {*s = 2; *k = 4; *m = *k-1;}
147
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
80 else if (nrm>0.07) {*s = 1; *k = 4; *m = *k-1;}
148
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
80 else if (nrm>0.01) {*s = 0; *k = 4; *m = *k-1;}
149
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
80 else if (nrm>3e-4) {*s = 0; *k = 3; *m = *k-1;}
150
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
80 else if (nrm>1e-5) {*s = 0; *k = 3; *m = 0;}
151
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
80 else if (nrm>1e-8) {*s = 0; *k = 2; *m = 0;}
152 80 else {*s = 0; *k = 1; *m = 0;}
153 }
154
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
512 PetscFunctionReturn(PETSC_SUCCESS);
155 }
156
157 /*
158 * Partial fraction form coefficients.
159 * If query, the function returns the size necessary to store the coefficients.
160 */
161 432 static PetscErrorCode getcoeffs(PetscInt k,PetscInt m,PetscComplex *r,PetscComplex *q,PetscComplex *remain,PetscBool query)
162 {
163 432 PetscInt i;
164 432 const PetscComplex /* m == k+1 */
165 432 p1r4[5] = {-1.582680186458572e+01 - 2.412564578224361e+01*PETSC_i,
166 432 -1.582680186458572e+01 + 2.412564578224361e+01*PETSC_i,
167 432 1.499984465975511e+02 + 6.804227952202417e+01*PETSC_i,
168 432 1.499984465975511e+02 - 6.804227952202417e+01*PETSC_i,
169 -2.733432894659307e+02 },
170 432 p1q4[5] = { 3.655694325463550e+00 + 6.543736899360086e+00*PETSC_i,
171 432 3.655694325463550e+00 - 6.543736899360086e+00*PETSC_i,
172 432 5.700953298671832e+00 + 3.210265600308496e+00*PETSC_i,
173 432 5.700953298671832e+00 - 3.210265600308496e+00*PETSC_i,
174 6.286704751729261e+00 },
175 432 p1r3[4] = {-1.130153999597152e+01 + 1.247167585025031e+01*PETSC_i,
176 432 -1.130153999597152e+01 - 1.247167585025031e+01*PETSC_i,
177 432 1.330153999597152e+01 - 6.007173273704750e+01*PETSC_i,
178 432 1.330153999597152e+01 + 6.007173273704750e+01*PETSC_i},
179 432 p1q3[4] = { 3.212806896871536e+00 + 4.773087433276636e+00*PETSC_i,
180 432 3.212806896871536e+00 - 4.773087433276636e+00*PETSC_i,
181 432 4.787193103128464e+00 + 1.567476416895212e+00*PETSC_i,
182 432 4.787193103128464e+00 - 1.567476416895212e+00*PETSC_i},
183 432 p1r2[3] = { 7.648749087422928e+00 + 4.171640244747463e+00*PETSC_i,
184 432 7.648749087422928e+00 - 4.171640244747463e+00*PETSC_i,
185 -1.829749817484586e+01 },
186 432 p1q2[3] = { 2.681082873627756e+00 + 3.050430199247411e+00*PETSC_i,
187 432 2.681082873627756e+00 - 3.050430199247411e+00*PETSC_i,
188 3.637834252744491e+00 },
189 432 p1r1[2] = { 1.000000000000000e+00 - 3.535533905932738e+00*PETSC_i,
190 432 1.000000000000000e+00 + 3.535533905932738e+00*PETSC_i},
191 432 p1q1[2] = { 2.000000000000000e+00 + 1.414213562373095e+00*PETSC_i,
192 432 2.000000000000000e+00 - 1.414213562373095e+00*PETSC_i};
193 432 const PetscComplex /* m == k-1 */
194 432 m1r5[4] = {-1.423367961376821e+02 - 1.385465094833037e+01*PETSC_i,
195 432 -1.423367961376821e+02 + 1.385465094833037e+01*PETSC_i,
196 432 2.647367961376822e+02 - 4.814394493714596e+02*PETSC_i,
197 432 2.647367961376822e+02 + 4.814394493714596e+02*PETSC_i},
198 432 m1q5[4] = { 5.203941240131764e+00 + 5.805856841805367e+00*PETSC_i,
199 432 5.203941240131764e+00 - 5.805856841805367e+00*PETSC_i,
200 432 6.796058759868242e+00 + 1.886649260140217e+00*PETSC_i,
201 432 6.796058759868242e+00 - 1.886649260140217e+00*PETSC_i},
202 432 m1r4[3] = { 2.484269593165883e+01 + 7.460342395992306e+01*PETSC_i,
203 432 2.484269593165883e+01 - 7.460342395992306e+01*PETSC_i,
204 -1.734353918633177e+02 },
205 432 m1q4[3] = { 4.675757014491557e+00 + 3.913489560603711e+00*PETSC_i,
206 432 4.675757014491557e+00 - 3.913489560603711e+00*PETSC_i,
207 5.648485971016893e+00 },
208 432 m1r3[2] = { 2.533333333333333e+01 - 2.733333333333333e+01*PETSC_i,
209 432 2.533333333333333e+01 + 2.733333333333333e+01*PETSC_i},
210 432 m1q3[2] = { 4.000000000000000e+00 + 2.000000000000000e+00*PETSC_i,
211 432 4.000000000000000e+00 - 2.000000000000000e+00*PETSC_i};
212 432 const PetscScalar /* m == k-1 */
213 432 m1remain5[2] = { 2.000000000000000e-01, 9.800000000000000e+00},
214 432 m1remain4[2] = {-2.500000000000000e-01, -7.750000000000000e+00},
215 432 m1remain3[2] = { 3.333333333333333e-01, 5.666666666666667e+00},
216 432 m1remain2[2] = {-0.5, -3.5},
217 432 remain3[4] = {1.0/6.0, 1.0/2.0, 1, 1},
218 432 remain2[3] = {1.0/2.0, 1, 1};
219
220
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
432 PetscFunctionBegin;
221
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
432 if (query) { /* query about buffer's size */
222
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
216 if (m==k+1) {
223 20 *remain = 0;
224 20 *r = *q = k+1;
225
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
20 PetscFunctionReturn(PETSC_SUCCESS); /* quick return */
226 }
227
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
196 if (m==k-1) {
228 196 *remain = 2;
229
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
196 if (k==5) *r = *q = 4;
230
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
40 else if (k==4) *r = *q = 3;
231 else if (k==3) *r = *q = 2;
232 else if (k==2) *r = *q = 1;
233 }
234
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
196 if (m==0) {
235 *r = *q = 0;
236 *remain = k+1;
237 }
238 } else {
239
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
216 if (m==k+1) {
240
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
20 if (k==4) {
241
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
120 for (i=0;i<5;i++) { r[i] = p1r4[i]; q[i] = p1q4[i]; }
242 } else if (k==3) {
243 for (i=0;i<4;i++) { r[i] = p1r3[i]; q[i] = p1q3[i]; }
244 } else if (k==2) {
245 for (i=0;i<3;i++) { r[i] = p1r2[i]; q[i] = p1q2[i]; }
246 } else if (k==1) {
247 for (i=0;i<2;i++) { r[i] = p1r1[i]; q[i] = p1q1[i]; }
248 }
249
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
20 PetscFunctionReturn(PETSC_SUCCESS); /* quick return */
250 }
251
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
196 if (m==k-1) {
252
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
196 if (k==5) {
253
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
780 for (i=0;i<4;i++) { r[i] = m1r5[i]; q[i] = m1q5[i]; }
254
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
468 for (i=0;i<2;i++) remain[i] = m1remain5[i];
255
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
40 } else if (k==4) {
256
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
160 for (i=0;i<3;i++) { r[i] = m1r4[i]; q[i] = m1q4[i]; }
257
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
120 for (i=0;i<2;i++) remain[i] = m1remain4[i];
258 } else if (k==3) {
259 for (i=0;i<2;i++) { r[i] = m1r3[i]; q[i] = m1q3[i]; remain[i] = m1remain3[i]; }
260 } else if (k==2) {
261 r[0] = -13.5; q[0] = 3;
262 for (i=0;i<2;i++) remain[i] = m1remain2[i];
263 }
264 }
265
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
196 if (m==0) {
266 r = q = NULL;
267 if (k==3) {
268 for (i=0;i<4;i++) remain[i] = remain3[i];
269 } else if (k==2) {
270 for (i=0;i<3;i++) remain[i] = remain2[i];
271 }
272 }
273 }
274
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
72 PetscFunctionReturn(PETSC_SUCCESS);
275 }
276
277 /*
278 * Product form coefficients.
279 * If query, the function returns the size necessary to store the coefficients.
280 */
281 432 static PetscErrorCode getcoeffsproduct(PetscInt k,PetscInt m,PetscComplex *p,PetscComplex *q,PetscComplex *mult,PetscBool query)
282 {
283 432 PetscInt i;
284 432 const PetscComplex /* m == k+1 */
285 432 p1p4[4] = {-5.203941240131764e+00 + 5.805856841805367e+00*PETSC_i,
286 432 -5.203941240131764e+00 - 5.805856841805367e+00*PETSC_i,
287 432 -6.796058759868242e+00 + 1.886649260140217e+00*PETSC_i,
288 432 -6.796058759868242e+00 - 1.886649260140217e+00*PETSC_i},
289 432 p1q4[5] = { 3.655694325463550e+00 + 6.543736899360086e+00*PETSC_i,
290 432 3.655694325463550e+00 - 6.543736899360086e+00*PETSC_i,
291 6.286704751729261e+00 ,
292 432 5.700953298671832e+00 + 3.210265600308496e+00*PETSC_i,
293 432 5.700953298671832e+00 - 3.210265600308496e+00*PETSC_i},
294 432 p1p3[3] = {-4.675757014491557e+00 + 3.913489560603711e+00*PETSC_i,
295 432 -4.675757014491557e+00 - 3.913489560603711e+00*PETSC_i,
296 -5.648485971016893e+00 },
297 432 p1q3[4] = { 3.212806896871536e+00 + 4.773087433276636e+00*PETSC_i,
298 432 3.212806896871536e+00 - 4.773087433276636e+00*PETSC_i,
299 432 4.787193103128464e+00 + 1.567476416895212e+00*PETSC_i,
300 432 4.787193103128464e+00 - 1.567476416895212e+00*PETSC_i},
301 432 p1p2[2] = {-4.00000000000000e+00 + 2.000000000000000e+00*PETSC_i,
302 432 -4.00000000000000e+00 - 2.000000000000000e+00*PETSC_i},
303 432 p1q2[3] = { 2.681082873627756e+00 + 3.050430199247411e+00*PETSC_i,
304 432 2.681082873627756e+00 - 3.050430199247411e+00*PETSC_i,
305 3.637834252744491e+00 },
306 432 p1q1[2] = { 2.000000000000000e+00 + 1.414213562373095e+00*PETSC_i,
307 432 2.000000000000000e+00 - 1.414213562373095e+00*PETSC_i};
308 432 const PetscComplex /* m == k-1 */
309 432 m1p5[5] = {-3.655694325463550e+00 + 6.543736899360086e+00*PETSC_i,
310 432 -3.655694325463550e+00 - 6.543736899360086e+00*PETSC_i,
311 -6.286704751729261e+00 ,
312 432 -5.700953298671832e+00 + 3.210265600308496e+00*PETSC_i,
313 432 -5.700953298671832e+00 - 3.210265600308496e+00*PETSC_i},
314 432 m1q5[4] = { 5.203941240131764e+00 + 5.805856841805367e+00*PETSC_i,
315 432 5.203941240131764e+00 - 5.805856841805367e+00*PETSC_i,
316 432 6.796058759868242e+00 + 1.886649260140217e+00*PETSC_i,
317 432 6.796058759868242e+00 - 1.886649260140217e+00*PETSC_i},
318 432 m1p4[4] = {-3.212806896871536e+00 + 4.773087433276636e+00*PETSC_i,
319 432 -3.212806896871536e+00 - 4.773087433276636e+00*PETSC_i,
320 432 -4.787193103128464e+00 + 1.567476416895212e+00*PETSC_i,
321 432 -4.787193103128464e+00 - 1.567476416895212e+00*PETSC_i},
322 432 m1q4[3] = { 4.675757014491557e+00 + 3.913489560603711e+00*PETSC_i,
323 432 4.675757014491557e+00 - 3.913489560603711e+00*PETSC_i,
324 5.648485971016893e+00 },
325 432 m1p3[3] = {-2.681082873627756e+00 + 3.050430199247411e+00*PETSC_i,
326 432 -2.681082873627756e+00 - 3.050430199247411e+00*PETSC_i,
327 -3.637834252744491e+00 },
328 432 m1q3[2] = { 4.000000000000000e+00 + 2.000000000000000e+00*PETSC_i,
329 432 4.000000000000000e+00 - 2.000000000000001e+00*PETSC_i},
330 432 m1p2[2] = {-2.000000000000000e+00 + 1.414213562373095e+00*PETSC_i,
331 432 -2.000000000000000e+00 - 1.414213562373095e+00*PETSC_i};
332
333
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
432 PetscFunctionBegin;
334
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
432 if (query) {
335
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
216 if (m == k+1) {
336 20 *mult = 1;
337 20 *p = k;
338 20 *q = k+1;
339
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
20 PetscFunctionReturn(PETSC_SUCCESS);
340 }
341
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
196 if (m==k-1) {
342 196 *mult = 1;
343 196 *p = k;
344 196 *q = k-1;
345 }
346 } else {
347
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
216 if (m == k+1) {
348 20 *mult = PetscPowInt(-1,m);
349 20 *mult *= m;
350
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
20 if (k==4) {
351
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
100 for (i=0;i<4;i++) { p[i] = p1p4[i]; q[i] = p1q4[i]; }
352 20 q[4] = p1q4[4];
353 } else if (k==3) {
354 for (i=0;i<3;i++) { p[i] = p1p3[i]; q[i] = p1q3[i]; }
355 q[3] = p1q3[3];
356 } else if (k==2) {
357 for (i=0;i<2;i++) { p[i] = p1p2[i]; q[i] = p1q2[i]; }
358 q[2] = p1q2[2];
359 } else if (k==1) {
360 p[0] = -3;
361 for (i=0;i<2;i++) q[i] = p1q1[i];
362 }
363
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
20 PetscFunctionReturn(PETSC_SUCCESS);
364 }
365
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
196 if (m==k-1) {
366 196 *mult = PetscPowInt(-1,m);
367 196 *mult /= k;
368
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
196 if (k==5) {
369
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
780 for (i=0;i<4;i++) { p[i] = m1p5[i]; q[i] = m1q5[i]; }
370 156 p[4] = m1p5[4];
371
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
40 } else if (k==4) {
372
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
160 for (i=0;i<3;i++) { p[i] = m1p4[i]; q[i] = m1q4[i]; }
373 40 p[3] = m1p4[3];
374 } else if (k==3) {
375 for (i=0;i<2;i++) { p[i] = m1p3[i]; q[i] = m1q3[i]; }
376 p[2] = m1p3[2];
377 } else if (k==2) {
378 for (i=0;i<2;i++) p[i] = m1p2[i];
379 q[0] = 3;
380 }
381 }
382 }
383
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
72 PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 #endif /* PETSC_HAVE_COMPLEX */
386
387 #if defined(PETSC_USE_COMPLEX)
388 100 static PetscErrorCode getisreal(PetscInt n,PetscComplex *a,PetscBool *result)
389 {
390 100 PetscInt i;
391
392
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
100 PetscFunctionBegin;
393 100 *result=PETSC_TRUE;
394
3/4
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✗ Branch 3 not taken.
296100 for (i=0;i<n&&*result;i++) {
395
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
296000 if (PetscImaginaryPartComplex(a[i])) *result=PETSC_FALSE;
396 }
397
6/12
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 1 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
100 PetscFunctionReturn(PETSC_SUCCESS);
398 }
399 #endif
400
401 /*
402 * Matrix exponential implementation based on algorithm and matlab code by Stefan Guettel
403 * and Yuji Nakatsukasa
404 *
405 * Stefan Guettel and Yuji Nakatsukasa, "Scaled and Squared Subdiagonal Pade
406 * Approximation for the Matrix Exponential",
407 * SIAM J. Matrix Anal. Appl. 37(1):145-170, 2016.
408 * https://doi.org/10.1137/15M1027553
409 */
410 480 static PetscErrorCode FNEvaluateFunctionMat_Exp_GuettelNakatsukasa(FN fn,Mat A,Mat B)
411 {
412 #if !defined(PETSC_HAVE_COMPLEX)
413 PetscFunctionBegin;
414 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This function requires C99 or C++ complex support");
415 #else
416 480 PetscInt i,j,n_,s,k,m,mod;
417 480 PetscBLASInt n=0,n2=0,irsize=0,rsizediv2,ipsize=0,iremainsize=0,info,*piv,minlen,lwork=0,one=1;
418 480 PetscReal nrm,shift=0.0;
419 #if defined(PETSC_USE_COMPLEX)
420 240 PetscReal *rwork=NULL;
421 #endif
422 480 PetscComplex *As,*RR,*RR2,*expmA,*expmA2,*Maux,*Maux2,rsize,*r,psize,*p,remainsize,*remainterm,*rootp,*rootq,mult=0.0,scale,cone=1.0,czero=0.0,*aux;
423 480 PetscScalar *Ba,*Ba2,*sMaux,*wr,*wi,expshift,sone=1.0,szero=0.0,*saux;
424 480 const PetscScalar *Aa;
425 480 PetscBool isreal,flg;
426 480 PetscBLASInt query=-1;
427 480 PetscScalar work1,*work;
428
429
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
480 PetscFunctionBegin;
430
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
480 PetscCall(MatGetSize(A,&n_,NULL));
431
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
480 PetscCall(PetscBLASIntCast(n_,&n));
432
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
480 PetscCall(MatDenseGetArrayRead(A,&Aa));
433
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
480 PetscCall(MatDenseGetArray(B,&Ba));
434 480 Ba2 = Ba;
435
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
480 PetscCall(PetscBLASIntCast(n*n,&n2));
436
437
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
480 PetscCall(PetscMalloc2(n2,&sMaux,n2,&Maux));
438 480 Maux2 = Maux;
439
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
480 PetscCall(PetscOptionsGetReal(NULL,NULL,"-fn_expm_estimated_eig",&shift,&flg));
440
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
480 if (!flg) {
441
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
480 PetscCall(PetscMalloc2(n,&wr,n,&wi));
442
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
480 PetscCall(PetscArraycpy(sMaux,Aa,n2));
443 /* estimate rightmost eigenvalue and shift A with it */
444 #if !defined(PETSC_USE_COMPLEX)
445
10/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
240 PetscCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&n,sMaux,&n,wr,wi,NULL,&n,NULL,&n,&work1,&query,&info));
446
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
240 SlepcCheckLapackInfo("geev",info);
447
4/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
240 PetscCall(PetscBLASIntCast((PetscInt)work1,&lwork));
448
4/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
240 PetscCall(PetscMalloc1(lwork,&work));
449
10/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
240 PetscCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&n,sMaux,&n,wr,wi,NULL,&n,NULL,&n,work,&lwork,&info));
450
5/8
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 4 times.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
240 PetscCall(PetscFree(work));
451 #else
452
4/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
240 PetscCall(PetscArraycpy(Maux,Aa,n2));
453
10/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
240 PetscCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&n,Maux,&n,wr,NULL,&n,NULL,&n,&work1,&query,rwork,&info));
454
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
240 SlepcCheckLapackInfo("geev",info);
455
4/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
240 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(work1),&lwork));
456
4/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
240 PetscCall(PetscMalloc2(2*n,&rwork,lwork,&work));
457
10/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
240 PetscCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&n,Maux,&n,wr,NULL,&n,NULL,&n,work,&lwork,rwork,&info));
458
4/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
240 PetscCall(PetscFree2(rwork,work));
459 #endif
460
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
480 SlepcCheckLapackInfo("geev",info);
461
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
480 PetscCall(PetscLogFlops(25.0*n*n*n+(n*n*n)/3.0+1.0*n*n*n));
462
463 480 shift = PetscRealPart(wr[0]);
464
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
13600 for (i=1;i<n;i++) {
465
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
13120 if (PetscRealPart(wr[i]) > shift) shift = PetscRealPart(wr[i]);
466 }
467
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
480 PetscCall(PetscFree2(wr,wi));
468 }
469 /* shift so that largest real part is (about) 0 */
470
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
480 PetscCall(PetscArraycpy(sMaux,Aa,n2));
471
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
480 if (shift) {
472
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
14080 for (i=0;i<n;i++) sMaux[i+i*n] -= shift;
473
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
480 PetscCall(PetscLogFlops(1.0*n));
474 }
475 #if defined(PETSC_USE_COMPLEX)
476
4/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
240 PetscCall(PetscArraycpy(Maux,Aa,n2));
477
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
240 if (shift) {
478
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
7040 for (i=0;i<n;i++) Maux[i+i*n] -= shift;
479
3/6
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
240 PetscCall(PetscLogFlops(1.0*n));
480 }
481 #endif
482
483 /* estimate norm(A) and select the scaling factor */
484 480 nrm = LAPACKlange_("O",&n,&n,sMaux,&n,NULL);
485
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
480 PetscCall(PetscLogFlops(1.0*n*n));
486
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
480 PetscCall(sexpm_params(nrm,&s,&k,&m));
487
4/6
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.
480 if (s==0 && k==1 && m==0) { /* exp(A) = I+A to eps! */
488
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
80 if (shift) expshift = PetscExpReal(shift);
489
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
880 for (i=0;i<n;i++) sMaux[i+i*n] += 1.0;
490
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
80 if (shift) {
491
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
80 PetscCallBLAS("BLASscal",BLASscal_(&n2,&expshift,sMaux,&one));
492
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
80 PetscCall(PetscLogFlops(1.0*(n+n2)));
493 } else PetscCall(PetscLogFlops(1.0*n));
494
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
80 PetscCall(PetscArraycpy(Ba,sMaux,n2));
495
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
80 PetscCall(PetscFree2(sMaux,Maux));
496
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
80 PetscCall(MatDenseRestoreArrayRead(A,&Aa));
497
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
80 PetscCall(MatDenseRestoreArray(B,&Ba));
498
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
16 PetscFunctionReturn(PETSC_SUCCESS); /* quick return */
499 }
500
501
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
400 PetscCall(PetscMalloc4(n2,&expmA,n2,&As,n2,&RR,n,&piv));
502 400 expmA2 = expmA; RR2 = RR;
503 /* scale matrix */
504 #if !defined(PETSC_USE_COMPLEX)
505
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
592200 for (i=0;i<n2;i++) {
506 592000 As[i] = sMaux[i];
507 }
508 #else
509
4/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
200 PetscCall(PetscArraycpy(As,sMaux,n2));
510 #endif
511 400 scale = 1.0/PetscPowRealInt(2.0,s);
512
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
400 PetscCallBLAS("BLASCOMPLEXscal",BLASCOMPLEXscal_(&n2,&scale,As,&one));
513
4/6
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
400 PetscCall(SlepcLogFlopsComplex(1.0*n2));
514
515 /* evaluate Pade approximant (partial fraction or product form) */
516
3/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 10 times.
400 if (fn->method==3 || !m) { /* partial fraction */
517
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
200 PetscCall(getcoeffs(k,m,&rsize,&psize,&remainsize,PETSC_TRUE));
518
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
200 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPartComplex(rsize),&irsize));
519
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
200 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPartComplex(psize),&ipsize));
520
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
200 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPartComplex(remainsize),&iremainsize));
521
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
200 PetscCall(PetscMalloc3(irsize,&r,ipsize,&p,iremainsize,&remainterm));
522
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
200 PetscCall(getcoeffs(k,m,r,p,remainterm,PETSC_FALSE));
523
524
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
200 PetscCall(PetscArrayzero(expmA,n2));
525 #if !defined(PETSC_USE_COMPLEX)
526 100 isreal = PETSC_TRUE;
527 #else
528
4/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
100 PetscCall(getisreal(n2,Maux,&isreal));
529 #endif
530
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
200 if (isreal) {
531 200 rsizediv2 = irsize/2;
532
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
560 for (i=0;i<rsizediv2;i++) { /* use partial fraction to get R(As) */
533
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
360 PetscCall(PetscArraycpy(Maux,As,n2));
534
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
360 PetscCall(PetscArrayzero(RR,n2));
535
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
12760 for (j=0;j<n;j++) {
536 12400 Maux[j+j*n] -= p[2*i];
537 12400 RR[j+j*n] = r[2*i];
538 }
539
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
360 PetscCallBLAS("LAPACKCOMPLEXgesv",LAPACKCOMPLEXgesv_(&n,&n,Maux,&n,piv,RR,&n,&info));
540
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
360 SlepcCheckLapackInfo("gesv",info);
541
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1180360 for (j=0;j<n2;j++) {
542 1180000 expmA[j] += RR[j] + PetscConj(RR[j]);
543 }
544 /* loop(n) + gesv + loop(n2) */
545
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
360 PetscCall(SlepcLogFlopsComplex(1.0*n+(2.0*n*n*n/3.0+2.0*n*n*n)+2.0*n2));
546 }
547
548 200 mod = ipsize % 2;
549
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
200 if (mod) {
550
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
60 PetscCall(PetscArraycpy(Maux,As,n2));
551
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
60 PetscCall(PetscArrayzero(RR,n2));
552
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
660 for (j=0;j<n;j++) {
553 600 Maux[j+j*n] -= p[ipsize-1];
554 600 RR[j+j*n] = r[irsize-1];
555 }
556
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
60 PetscCallBLAS("LAPACKCOMPLEXgesv",LAPACKCOMPLEXgesv_(&n,&n,Maux,&n,piv,RR,&n,&info));
557
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
60 SlepcCheckLapackInfo("gesv",info);
558
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
6060 for (j=0;j<n2;j++) {
559 6000 expmA[j] += RR[j];
560 }
561
4/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 1 times.
60 PetscCall(SlepcLogFlopsComplex(1.0*n+(2.0*n*n*n/3.0+2.0*n*n*n)+1.0*n2));
562 }
563 } else { /* complex */
564 for (i=0;i<irsize;i++) { /* use partial fraction to get R(As) */
565 PetscCall(PetscArraycpy(Maux,As,n2));
566 PetscCall(PetscArrayzero(RR,n2));
567 for (j=0;j<n;j++) {
568 Maux[j+j*n] -= p[i];
569 RR[j+j*n] = r[i];
570 }
571 PetscCallBLAS("LAPACKCOMPLEXgesv",LAPACKCOMPLEXgesv_(&n,&n,Maux,&n,piv,RR,&n,&info));
572 SlepcCheckLapackInfo("gesv",info);
573 for (j=0;j<n2;j++) {
574 expmA[j] += RR[j];
575 }
576 PetscCall(SlepcLogFlopsComplex(1.0*n+(2.0*n*n*n/3.0+2.0*n*n*n)+1.0*n2));
577 }
578 }
579
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
560 for (i=0;i<iremainsize;i++) {
580
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
360 if (!i) {
581
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
180 PetscCall(PetscArrayzero(RR,n2));
582
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
6380 for (j=0;j<n;j++) {
583 6200 RR[j+j*n] = remainterm[iremainsize-1];
584 }
585 } else {
586
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
180 PetscCall(PetscArraycpy(RR,As,n2));
587
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
180 for (j=1;j<i;j++) {
588 PetscCallBLAS("BLASCOMPLEXgemm",BLASCOMPLEXgemm_("N","N",&n,&n,&n,&cone,RR,&n,RR,&n,&czero,Maux,&n));
589 SlepcSwap(RR,Maux,aux);
590 PetscCall(SlepcLogFlopsComplex(2.0*n*n*n));
591 }
592
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
180 PetscCallBLAS("BLASCOMPLEXscal",BLASCOMPLEXscal_(&n2,&remainterm[iremainsize-1-i],RR,&one));
593
4/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 1 times.
180 PetscCall(SlepcLogFlopsComplex(1.0*n2));
594 }
595
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1180360 for (j=0;j<n2;j++) {
596 1180000 expmA[j] += RR[j];
597 }
598
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
360 PetscCall(SlepcLogFlopsComplex(1.0*n2));
599 }
600
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
200 PetscCall(PetscFree3(r,p,remainterm));
601 } else { /* product form, default */
602
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
200 PetscCall(getcoeffsproduct(k,m,&rsize,&psize,&mult,PETSC_TRUE));
603
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
200 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPartComplex(rsize),&irsize));
604
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
200 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPartComplex(psize),&ipsize));
605
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
200 PetscCall(PetscMalloc2(irsize,&rootp,ipsize,&rootq));
606
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
200 PetscCall(getcoeffsproduct(k,m,rootp,rootq,&mult,PETSC_FALSE));
607
608
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
200 PetscCall(PetscArrayzero(expmA,n2));
609
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
6600 for (i=0;i<n;i++) { /* initialize */
610 6400 expmA[i+i*n] = 1.0;
611 }
612 200 minlen = PetscMin(irsize,ipsize);
613
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
960 for (i=0;i<minlen;i++) {
614
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
760 PetscCall(PetscArraycpy(RR,As,n2));
615
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
25960 for (j=0;j<n;j++) {
616 25200 RR[j+j*n] -= rootp[i];
617 }
618
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
760 PetscCallBLAS("BLASCOMPLEXgemm",BLASCOMPLEXgemm_("N","N",&n,&n,&n,&cone,RR,&n,expmA,&n,&czero,Maux,&n));
619 760 SlepcSwap(expmA,Maux,aux);
620
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
760 PetscCall(PetscArraycpy(RR,As,n2));
621
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
25960 for (j=0;j<n;j++) {
622 25200 RR[j+j*n] -= rootq[i];
623 }
624
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
760 PetscCallBLAS("LAPACKCOMPLEXgesv",LAPACKCOMPLEXgesv_(&n,&n,RR,&n,piv,expmA,&n,&info));
625
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
760 SlepcCheckLapackInfo("gesv",info);
626 /* loop(n) + gemm + loop(n) + gesv */
627
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
760 PetscCall(SlepcLogFlopsComplex(1.0*n+(2.0*n*n*n)+1.0*n+(2.0*n*n*n/3.0+2.0*n*n*n)));
628 }
629 /* extra numerator */
630
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
380 for (i=minlen;i<irsize;i++) {
631
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
180 PetscCall(PetscArraycpy(RR,As,n2));
632
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
6380 for (j=0;j<n;j++) {
633 6200 RR[j+j*n] -= rootp[i];
634 }
635
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
180 PetscCallBLAS("BLASCOMPLEXgemm",BLASCOMPLEXgemm_("N","N",&n,&n,&n,&cone,RR,&n,expmA,&n,&czero,Maux,&n));
636 180 SlepcSwap(expmA,Maux,aux);
637
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
180 PetscCall(SlepcLogFlopsComplex(1.0*n+2.0*n*n*n));
638 }
639 /* extra denominator */
640
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
220 for (i=minlen;i<ipsize;i++) {
641
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
20 PetscCall(PetscArraycpy(RR,As,n2));
642
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
220 for (j=0;j<n;j++) RR[j+j*n] -= rootq[i];
643
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
20 PetscCallBLAS("LAPACKCOMPLEXgesv",LAPACKCOMPLEXgesv_(&n,&n,RR,&n,piv,expmA,&n,&info));
644
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
20 SlepcCheckLapackInfo("gesv",info);
645
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
20 PetscCall(SlepcLogFlopsComplex(1.0*n+(2.0*n*n*n/3.0+2.0*n*n*n)));
646 }
647
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
200 PetscCallBLAS("BLASCOMPLEXscal",BLASCOMPLEXscal_(&n2,&mult,expmA,&one));
648
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
200 PetscCall(SlepcLogFlopsComplex(1.0*n2));
649
5/6
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✓ Branch 5 taken 1 times.
200 PetscCall(PetscFree2(rootp,rootq));
650 }
651
652 #if !defined(PETSC_USE_COMPLEX)
653
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
592200 for (i=0;i<n2;i++) {
654 592000 Ba2[i] = PetscRealPartComplex(expmA[i]);
655 }
656 #else
657
4/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
200 PetscCall(PetscArraycpy(Ba2,expmA,n2));
658 #endif
659
660 /* perform repeated squaring */
661
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1960 for (i=0;i<s;i++) { /* final squaring */
662
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
1560 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,Ba2,&n,Ba2,&n,&szero,sMaux,&n));
663 1560 SlepcSwap(Ba2,sMaux,saux);
664
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1560 PetscCall(PetscLogFlops(2.0*n*n*n));
665 }
666
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
400 if (Ba2!=Ba) {
667
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
40 PetscCall(PetscArraycpy(Ba,Ba2,n2));
668 40 sMaux = Ba2;
669 }
670
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
400 if (shift) {
671 400 expshift = PetscExpReal(shift);
672
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
400 PetscCallBLAS("BLASscal",BLASscal_(&n2,&expshift,Ba,&one));
673
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
400 PetscCall(PetscLogFlops(1.0*n2));
674 }
675
676 /* restore pointers */
677 400 Maux = Maux2; expmA = expmA2; RR = RR2;
678
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
400 PetscCall(PetscFree2(sMaux,Maux));
679
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
400 PetscCall(PetscFree4(expmA,As,RR,piv));
680
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
400 PetscCall(MatDenseRestoreArrayRead(A,&Aa));
681
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
400 PetscCall(MatDenseRestoreArray(B,&Ba));
682
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
80 PetscFunctionReturn(PETSC_SUCCESS);
683 #endif
684 }
685
686 #define SMALLN 100
687
688 /*
689 * Function needed to compute optimal parameters (required workspace is 3*n*n)
690 */
691 4590 static PetscInt ell(PetscBLASInt n,PetscScalar *A,PetscReal coeff,PetscInt m,PetscScalar *work,PetscRandom rand)
692 {
693 4590 PetscScalar *Ascaled=work;
694 4590 PetscReal nrm,alpha,beta,rwork[1];
695 4590 PetscInt t;
696 4590 PetscBLASInt i,j;
697
698
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
4590 PetscFunctionBegin;
699 4590 beta = PetscPowReal(coeff,1.0/(2*m+1));
700
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
115632 for (i=0;i<n;i++)
701
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
6614592 for (j=0;j<n;j++)
702 6503550 Ascaled[i+j*n] = beta*PetscAbsScalar(A[i+j*n]);
703 4590 nrm = LAPACKlange_("O",&n,&n,A,&n,rwork);
704
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4590 PetscCall(PetscLogFlops(2.0*n*n));
705
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4590 PetscCall(SlepcNormAm(n,Ascaled,2*m+1,work+n*n,rand,&alpha));
706 4590 alpha /= nrm;
707
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
4590 t = PetscMax((PetscInt)PetscCeilReal(PetscLogReal(2.0*alpha/PETSC_MACHINE_EPSILON)/PetscLogReal(2.0)/(2*m)),0);
708
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
902 PetscFunctionReturn(t);
709 }
710
711 /*
712 * Compute scaling parameter (s) and order of Pade approximant (m) (required workspace is 4*n*n)
713 */
714 4588 static PetscErrorCode expm_params(PetscInt n,PetscScalar **Apowers,PetscInt *s,PetscInt *m,PetscScalar *work)
715 {
716 4588 PetscScalar sfactor,sone=1.0,szero=0.0,*A=Apowers[0],*Ascaled;
717 4588 PetscReal d4,d6,d8,d10,eta1,eta3,eta4,eta5,rwork[1];
718 4588 PetscBLASInt n_=0,n2,one=1;
719 4588 PetscRandom rand;
720 4588 const PetscReal coeff[5] = { 9.92063492063492e-06, 9.94131285136576e-11, /* backward error function */
721 2.22819456055356e-16, 1.69079293431187e-22, 8.82996160201868e-36 };
722 4588 const PetscReal theta[5] = { 1.495585217958292e-002, /* m = 3 */
723 2.539398330063230e-001, /* m = 5 */
724 9.504178996162932e-001, /* m = 7 */
725 2.097847961257068e+000, /* m = 9 */
726 5.371920351148152e+000 }; /* m = 13 */
727
728
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
4588 PetscFunctionBegin;
729 4588 *s = 0;
730 4588 *m = 13;
731
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4588 PetscCall(PetscBLASIntCast(n,&n_));
732
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4588 PetscCall(PetscRandomCreate(PETSC_COMM_SELF,&rand));
733 4588 d4 = PetscPowReal(LAPACKlange_("O",&n_,&n_,Apowers[2],&n_,rwork),1.0/4.0);
734
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
4588 if (d4==0.0) { /* safeguard for the case A = 0 */
735 *m = 3;
736 goto done;
737 }
738 4588 d6 = PetscPowReal(LAPACKlange_("O",&n_,&n_,Apowers[3],&n_,rwork),1.0/6.0);
739
4/6
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4588 PetscCall(PetscLogFlops(2.0*n*n));
740
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
4588 eta1 = PetscMax(d4,d6);
741
3/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
4588 if (eta1<=theta[0] && !ell(n_,A,coeff[0],3,work,rand)) {
742 1115 *m = 3;
743 1115 goto done;
744 }
745
3/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
3473 if (eta1<=theta[1] && !ell(n_,A,coeff[1],5,work,rand)) {
746 1000 *m = 5;
747 1000 goto done;
748 }
749
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
2473 if (n<SMALLN) {
750
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2333 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,Apowers[2],&n_,Apowers[2],&n_,&szero,work,&n_));
751 2333 d8 = PetscPowReal(LAPACKlange_("O",&n_,&n_,work,&n_,rwork),1.0/8.0);
752
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
2333 PetscCall(PetscLogFlops(2.0*n*n*n+1.0*n*n));
753 } else {
754
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
140 PetscCall(SlepcNormAm(n_,Apowers[2],2,work,rand,&d8));
755 140 d8 = PetscPowReal(d8,1.0/8.0);
756 }
757
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
2473 eta3 = PetscMax(d6,d8);
758
3/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
2473 if (eta3<=theta[2] && !ell(n_,A,coeff[2],7,work,rand)) {
759 114 *m = 7;
760 114 goto done;
761 }
762
4/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 2 times.
2359 if (eta3<=theta[3] && !ell(n_,A,coeff[3],9,work,rand)) {
763 177 *m = 9;
764 177 goto done;
765 }
766
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
2182 if (n<SMALLN) {
767
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2042 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,Apowers[2],&n_,Apowers[3],&n_,&szero,work,&n_));
768 2042 d10 = PetscPowReal(LAPACKlange_("O",&n_,&n_,work,&n_,rwork),1.0/10.0);
769
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
2042 PetscCall(PetscLogFlops(2.0*n*n*n+1.0*n*n));
770 } else {
771
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
140 PetscCall(SlepcNormAm(n_,Apowers[1],5,work,rand,&d10));
772 140 d10 = PetscPowReal(d10,1.0/10.0);
773 }
774
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
2182 eta4 = PetscMax(d8,d10);
775
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
2182 eta5 = PetscMin(eta3,eta4);
776
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
2182 *s = PetscMax((PetscInt)PetscCeilReal(PetscLogReal(eta5/theta[4])/PetscLogReal(2.0)),0);
777
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
2182 if (*s) {
778 753 Ascaled = work+3*n*n;
779 753 n2 = n_*n_;
780
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
753 PetscCallBLAS("BLAScopy",BLAScopy_(&n2,A,&one,Ascaled,&one));
781 753 sfactor = PetscPowRealInt(2.0,-(*s));
782
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
753 PetscCallBLAS("BLASscal",BLASscal_(&n2,&sfactor,Ascaled,&one));
783
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
753 PetscCall(PetscLogFlops(1.0*n*n));
784 } else Ascaled = A;
785 2182 *s += ell(n_,Ascaled,coeff[4],13,work,rand);
786 4588 done:
787
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4588 PetscCall(PetscRandomDestroy(&rand));
788
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
902 PetscFunctionReturn(PETSC_SUCCESS);
789 }
790
791 /*
792 * Matrix exponential implementation based on algorithm and matlab code by N. Higham and co-authors
793 *
794 * N. J. Higham, "The scaling and squaring method for the matrix exponential
795 * revisited", SIAM J. Matrix Anal. Appl. 26(4):1179-1193, 2005.
796 */
797 4540 PetscErrorCode FNEvaluateFunctionMat_Exp_Higham(FN fn,Mat A,Mat B)
798 {
799 4540 PetscBLASInt n_=0,n2,*ipiv,info,one=1;
800 4540 PetscInt n,m,j,s;
801 4540 PetscScalar scale,smone=-1.0,sone=1.0,stwo=2.0,szero=0.0;
802 4540 PetscScalar *Ba,*Apowers[5],*Q,*P,*W,*work,*aux;
803 4540 const PetscScalar *Aa,*c;
804 4540 const PetscScalar c3[4] = { 120, 60, 12, 1 };
805 4540 const PetscScalar c5[6] = { 30240, 15120, 3360, 420, 30, 1 };
806 4540 const PetscScalar c7[8] = { 17297280, 8648640, 1995840, 277200, 25200, 1512, 56, 1 };
807 4540 const PetscScalar c9[10] = { 17643225600.0, 8821612800.0, 2075673600, 302702400, 30270240,
808 2162160, 110880, 3960, 90, 1 };
809 4540 const PetscScalar c13[14] = { 64764752532480000.0, 32382376266240000.0, 7771770303897600.0,
810 1187353796428800.0, 129060195264000.0, 10559470521600.0,
811 670442572800.0, 33522128640.0, 1323241920.0,
812 40840800, 960960, 16380, 182, 1 };
813
814
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
4540 PetscFunctionBegin;
815
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4540 PetscCall(MatDenseGetArrayRead(A,&Aa));
816
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4540 PetscCall(MatDenseGetArray(B,&Ba));
817
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4540 PetscCall(MatGetSize(A,&n,NULL));
818
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4540 PetscCall(PetscBLASIntCast(n,&n_));
819 4540 n2 = n_*n_;
820
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4540 PetscCall(PetscMalloc2(8*n*n,&work,n,&ipiv));
821
822 /* Matrix powers */
823 4540 Apowers[0] = work; /* Apowers[0] = A */
824 4540 Apowers[1] = Apowers[0] + n*n; /* Apowers[1] = A^2 */
825 4540 Apowers[2] = Apowers[1] + n*n; /* Apowers[2] = A^4 */
826 4540 Apowers[3] = Apowers[2] + n*n; /* Apowers[3] = A^6 */
827 4540 Apowers[4] = Apowers[3] + n*n; /* Apowers[4] = A^8 */
828
829
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4540 PetscCall(PetscArraycpy(Apowers[0],Aa,n2));
830
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
4540 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,Apowers[0],&n_,Apowers[0],&n_,&szero,Apowers[1],&n_));
831
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
4540 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,Apowers[1],&n_,Apowers[1],&n_,&szero,Apowers[2],&n_));
832
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
4540 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,Apowers[1],&n_,Apowers[2],&n_,&szero,Apowers[3],&n_));
833
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4540 PetscCall(PetscLogFlops(6.0*n*n*n));
834
835 /* Compute scaling parameter and order of Pade approximant */
836
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4540 PetscCall(expm_params(n,Apowers,&s,&m,Apowers[4]));
837
838
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
4540 if (s) { /* rescale */
839
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
3925 for (j=0;j<4;j++) {
840
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
3140 scale = PetscPowRealInt(2.0,-PetscMax(2*j,1)*s);
841
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
3140 PetscCallBLAS("BLASscal",BLASscal_(&n2,&scale,Apowers[j],&one));
842 }
843
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
785 PetscCall(PetscLogFlops(4.0*n*n));
844 }
845
846 /* Evaluate the Pade approximant */
847
5/6
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
✗ Branch 4 not taken.
✓ Branch 5 taken 10 times.
4540 switch (m) {
848 case 3: c = c3; break;
849 1000 case 5: c = c5; break;
850 98 case 7: c = c7; break;
851 177 case 9: c = c9; break;
852 2150 case 13: c = c13; break;
853 default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong value of m %" PetscInt_FMT,m);
854 }
855 4540 P = Ba;
856 4540 Q = Apowers[4] + n*n;
857 4540 W = Q + n*n;
858
2/3
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
4540 switch (m) {
859 2390 case 3:
860 case 5:
861 case 7:
862 case 9:
863
12/22
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 2 times.
✓ Branch 10 taken 2 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✓ Branch 13 taken 2 times.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✓ Branch 16 taken 2 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✓ Branch 19 taken 2 times.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
2390 if (m==9) PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,Apowers[1],&n_,Apowers[3],&n_,&szero,Apowers[4],&n_));
864
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
2390 PetscCall(PetscArrayzero(P,n2));
865
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
2390 PetscCall(PetscArrayzero(Q,n2));
866
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
25329 for (j=0;j<n;j++) {
867 22939 P[j+j*n] = c[1];
868 22939 Q[j+j*n] = c[0];
869 }
870
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
6507 for (j=m;j>=3;j-=2) {
871
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
4117 PetscCallBLAS("BLASaxpy",BLASaxpy_(&n2,&c[j],Apowers[(j+1)/2-1],&one,P,&one));
872
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
4117 PetscCallBLAS("BLASaxpy",BLASaxpy_(&n2,&c[j-1],Apowers[(j+1)/2-1],&one,Q,&one));
873
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4117 PetscCall(PetscLogFlops(4.0*n*n));
874 }
875
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2390 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,Apowers[0],&n_,P,&n_,&szero,W,&n_));
876
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
2390 PetscCall(PetscLogFlops(2.0*n*n*n));
877 2820 SlepcSwap(P,W,aux);
878 1918 break;
879 2150 case 13:
880 /* P = A*(Apowers[3]*(c[13]*Apowers[3] + c[11]*Apowers[2] + c[9]*Apowers[1])
881 + c[7]*Apowers[3] + c[5]*Apowers[2] + c[3]*Apowers[1] + c[1]*I) */
882
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLAScopy",BLAScopy_(&n2,Apowers[3],&one,P,&one));
883
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLASscal",BLASscal_(&n2,&c[13],P,&one));
884
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLASaxpy",BLASaxpy_(&n2,&c[11],Apowers[2],&one,P,&one));
885
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLASaxpy",BLASaxpy_(&n2,&c[9],Apowers[1],&one,P,&one));
886
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,Apowers[3],&n_,P,&n_,&szero,W,&n_));
887
3/6
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
2150 PetscCall(PetscLogFlops(5.0*n*n+2.0*n*n*n));
888
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
2150 PetscCall(PetscArrayzero(P,n2));
889
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
89713 for (j=0;j<n;j++) P[j+j*n] = c[1];
890
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLASaxpy",BLASaxpy_(&n2,&c[7],Apowers[3],&one,P,&one));
891
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLASaxpy",BLASaxpy_(&n2,&c[5],Apowers[2],&one,P,&one));
892
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLASaxpy",BLASaxpy_(&n2,&c[3],Apowers[1],&one,P,&one));
893
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLASaxpy",BLASaxpy_(&n2,&sone,P,&one,W,&one));
894
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,Apowers[0],&n_,W,&n_,&szero,P,&n_));
895
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
2150 PetscCall(PetscLogFlops(7.0*n*n+2.0*n*n*n));
896 /* Q = Apowers[3]*(c[12]*Apowers[3] + c[10]*Apowers[2] + c[8]*Apowers[1])
897 + c[6]*Apowers[3] + c[4]*Apowers[2] + c[2]*Apowers[1] + c[0]*I */
898
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLAScopy",BLAScopy_(&n2,Apowers[3],&one,Q,&one));
899
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLASscal",BLASscal_(&n2,&c[12],Q,&one));
900
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLASaxpy",BLASaxpy_(&n2,&c[10],Apowers[2],&one,Q,&one));
901
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLASaxpy",BLASaxpy_(&n2,&c[8],Apowers[1],&one,Q,&one));
902
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,Apowers[3],&n_,Q,&n_,&szero,W,&n_));
903
3/6
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
2150 PetscCall(PetscLogFlops(5.0*n*n+2.0*n*n*n));
904
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
2150 PetscCall(PetscArrayzero(Q,n2));
905
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
89713 for (j=0;j<n;j++) Q[j+j*n] = c[0];
906
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLASaxpy",BLASaxpy_(&n2,&c[6],Apowers[3],&one,Q,&one));
907
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLASaxpy",BLASaxpy_(&n2,&c[4],Apowers[2],&one,Q,&one));
908
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLASaxpy",BLASaxpy_(&n2,&c[2],Apowers[1],&one,Q,&one));
909
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2150 PetscCallBLAS("BLASaxpy",BLASaxpy_(&n2,&sone,W,&one,Q,&one));
910
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
2150 PetscCall(PetscLogFlops(7.0*n*n));
911 break;
912 default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong value of m %" PetscInt_FMT,m);
913 }
914
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
4540 PetscCallBLAS("BLASaxpy",BLASaxpy_(&n2,&smone,P,&one,Q,&one));
915
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
4540 PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&n_,&n_,Q,&n_,ipiv,P,&n_,&info));
916
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
4540 SlepcCheckLapackInfo("gesv",info);
917
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
4540 PetscCallBLAS("BLASscal",BLASscal_(&n2,&stwo,P,&one));
918
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
115042 for (j=0;j<n;j++) P[j+j*n] += 1.0;
919
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4540 PetscCall(PetscLogFlops(2.0*n*n*n/3.0+4.0*n*n));
920
921 /* Squaring */
922
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
11160 for (j=1;j<=s;j++) {
923
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
6620 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,P,&n_,P,&n_,&szero,W,&n_));
924 6620 SlepcSwap(P,W,aux);
925 }
926
6/8
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
4540 if (P!=Ba) PetscCall(PetscArraycpy(Ba,P,n2));
927
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4540 PetscCall(PetscLogFlops(2.0*n*n*n*s));
928
929
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4540 PetscCall(PetscFree2(work,ipiv));
930
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4540 PetscCall(MatDenseRestoreArrayRead(A,&Aa));
931
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4540 PetscCall(MatDenseRestoreArray(B,&Ba));
932
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
902 PetscFunctionReturn(PETSC_SUCCESS);
933 }
934
935 #if defined(PETSC_HAVE_CUDA)
936 #include "../src/sys/classes/fn/impls/cuda/fnutilcuda.h"
937 #include <slepccupmblas.h>
938
939 PetscErrorCode FNEvaluateFunctionMat_Exp_Pade_CUDA(FN fn,Mat A,Mat B)
940 {
941 PetscBLASInt n=0,ld,ld2,*d_ipiv,*d_info,info,one=1;
942 PetscInt m,k,sexp;
943 PetscBool odd;
944 const PetscInt p=MAX_PADE;
945 PetscReal c[MAX_PADE+1],s;
946 PetscScalar scale,smone=-1.0,sone=1.0,stwo=2.0,szero=0.0;
947 const PetscScalar *Aa;
948 PetscScalar *d_Ba,*d_As,*d_A2,*d_Q,*d_P,*d_W,*aux,**ppP,**d_ppP,**ppQ,**d_ppQ;
949 cublasHandle_t cublasv2handle;
950
951 PetscFunctionBegin;
952 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); /* For CUDA event timers */
953 PetscCall(PetscCUBLASGetHandle(&cublasv2handle));
954 PetscCall(MatGetSize(A,&m,NULL));
955 PetscCall(PetscBLASIntCast(m,&n));
956 ld = n;
957 ld2 = ld*ld;
958 if (A==B) {
959 PetscCallCUDA(cudaMalloc((void **)&d_As,sizeof(PetscScalar)*m*m));
960 PetscCall(MatDenseCUDAGetArrayRead(A,&Aa));
961 PetscCallCUDA(cudaMemcpy(d_As,Aa,sizeof(PetscScalar)*ld2,cudaMemcpyDeviceToDevice));
962 PetscCall(MatDenseCUDARestoreArrayRead(A,&Aa));
963 } else PetscCall(MatDenseCUDAGetArrayRead(A,(const PetscScalar**)&d_As));
964 PetscCall(MatDenseCUDAGetArrayWrite(B,&d_Ba));
965
966 PetscCallCUDA(cudaMalloc((void **)&d_Q,sizeof(PetscScalar)*m*m));
967 PetscCallCUDA(cudaMalloc((void **)&d_W,sizeof(PetscScalar)*m*m));
968 PetscCallCUDA(cudaMalloc((void **)&d_A2,sizeof(PetscScalar)*m*m));
969 PetscCallCUDA(cudaMalloc((void **)&d_ipiv,sizeof(PetscBLASInt)*ld));
970 PetscCallCUDA(cudaMalloc((void **)&d_info,sizeof(PetscBLASInt)));
971 PetscCallCUDA(cudaMalloc((void **)&d_ppP,sizeof(PetscScalar*)));
972 PetscCallCUDA(cudaMalloc((void **)&d_ppQ,sizeof(PetscScalar*)));
973
974 PetscCall(PetscMalloc1(1,&ppP));
975 PetscCall(PetscMalloc1(1,&ppQ));
976
977 d_P = d_Ba;
978 PetscCall(PetscLogGpuTimeBegin());
979
980 /* Pade' coefficients */
981 c[0] = 1.0;
982 for (k=1;k<=p;k++) c[k] = c[k-1]*(p+1-k)/(k*(2*p+1-k));
983
984 /* Scaling */
985 PetscCallCUBLAS(cublasXnrm2(cublasv2handle,ld2,d_As,one,&s));
986 if (s>0.5) {
987 sexp = PetscMax(0,(int)(PetscLogReal(s)/PetscLogReal(2.0))+2);
988 scale = PetscPowRealInt(2.0,-sexp);
989 PetscCallCUBLAS(cublasXscal(cublasv2handle,ld2,&scale,d_As,one));
990 PetscCall(PetscLogGpuFlops(1.0*n*n));
991 } else sexp = 0;
992
993 /* Horner evaluation */
994 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_As,ld,d_As,ld,&szero,d_A2,ld));
995 PetscCall(PetscLogGpuFlops(2.0*n*n*n));
996 PetscCallCUDA(cudaMemset(d_Q,0,sizeof(PetscScalar)*ld2));
997 PetscCallCUDA(cudaMemset(d_P,0,sizeof(PetscScalar)*ld2));
998 PetscCall(set_diagonal(n,d_Q,ld,c[p]));
999 PetscCall(set_diagonal(n,d_P,ld,c[p-1]));
1000
1001 odd = PETSC_TRUE;
1002 for (k=p-1;k>0;k--) {
1003 if (odd) {
1004 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_Q,ld,d_A2,ld,&szero,d_W,ld));
1005 SlepcSwap(d_Q,d_W,aux);
1006 PetscCall(shift_diagonal(n,d_Q,ld,c[k-1]));
1007 odd = PETSC_FALSE;
1008 } else {
1009 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_P,ld,d_A2,ld,&szero,d_W,ld));
1010 SlepcSwap(d_P,d_W,aux);
1011 PetscCall(shift_diagonal(n,d_P,ld,c[k-1]));
1012 odd = PETSC_TRUE;
1013 }
1014 PetscCall(PetscLogGpuFlops(2.0*n*n*n));
1015 }
1016 if (odd) {
1017 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_Q,ld,d_As,ld,&szero,d_W,ld));
1018 SlepcSwap(d_Q,d_W,aux);
1019 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,ld2,&smone,d_P,one,d_Q,one));
1020
1021 ppQ[0] = d_Q;
1022 ppP[0] = d_P;
1023 PetscCallCUDA(cudaMemcpy(d_ppQ,ppQ,sizeof(PetscScalar*),cudaMemcpyHostToDevice));
1024 PetscCallCUDA(cudaMemcpy(d_ppP,ppP,sizeof(PetscScalar*),cudaMemcpyHostToDevice));
1025
1026 PetscCallCUBLAS(cublasXgetrfBatched(cublasv2handle,n,d_ppQ,ld,d_ipiv,d_info,one));
1027 PetscCallCUDA(cudaMemcpy(&info,d_info,sizeof(PetscBLASInt),cudaMemcpyDeviceToHost));
1028 PetscCheck(info>=0,PETSC_COMM_SELF,PETSC_ERR_LIB,"LAPACKgetrf: Illegal value on argument %" PetscBLASInt_FMT,PetscAbsInt(info));
1029 PetscCheck(info<=0,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"LAPACKgetrf: Matrix is singular. U(%" PetscBLASInt_FMT ",%" PetscBLASInt_FMT ") is zero",info,info);
1030 PetscCallCUBLAS(cublasXgetrsBatched(cublasv2handle,CUBLAS_OP_N,n,n,(const PetscScalar **)d_ppQ,ld,d_ipiv,d_ppP,ld,&info,one));
1031 PetscCheck(info>=0,PETSC_COMM_SELF,PETSC_ERR_LIB,"LAPACKgetri: Illegal value on argument %" PetscBLASInt_FMT,PetscAbsInt(info));
1032 PetscCheck(info<=0,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"LAPACKgetri: Matrix is singular. U(%" PetscBLASInt_FMT ",%" PetscBLASInt_FMT ") is zero",info,info);
1033 PetscCallCUBLAS(cublasXscal(cublasv2handle,ld2,&stwo,d_P,one));
1034 PetscCall(shift_diagonal(n,d_P,ld,sone));
1035 PetscCallCUBLAS(cublasXscal(cublasv2handle,ld2,&smone,d_P,one));
1036 } else {
1037 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_P,ld,d_As,ld,&szero,d_W,ld));
1038 SlepcSwap(d_P,d_W,aux);
1039 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,ld2,&smone,d_P,one,d_Q,one));
1040
1041 ppQ[0] = d_Q;
1042 ppP[0] = d_P;
1043 PetscCallCUDA(cudaMemcpy(d_ppQ,ppQ,sizeof(PetscScalar*),cudaMemcpyHostToDevice));
1044 PetscCallCUDA(cudaMemcpy(d_ppP,ppP,sizeof(PetscScalar*),cudaMemcpyHostToDevice));
1045
1046 PetscCallCUBLAS(cublasXgetrfBatched(cublasv2handle,n,d_ppQ,ld,d_ipiv,d_info,one));
1047 PetscCallCUDA(cudaMemcpy(&info,d_info,sizeof(PetscBLASInt),cudaMemcpyDeviceToHost));
1048 PetscCheck(info>=0,PETSC_COMM_SELF,PETSC_ERR_LIB,"LAPACKgetrf: Illegal value on argument %" PetscBLASInt_FMT,PetscAbsInt(info));
1049 PetscCheck(info<=0,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"LAPACKgetrf: Matrix is singular. U(%" PetscBLASInt_FMT ",%" PetscBLASInt_FMT ") is zero",info,info);
1050 PetscCallCUBLAS(cublasXgetrsBatched(cublasv2handle,CUBLAS_OP_N,n,n,(const PetscScalar **)d_ppQ,ld,d_ipiv,d_ppP,ld,&info,one));
1051 PetscCheck(info>=0,PETSC_COMM_SELF,PETSC_ERR_LIB,"LAPACKgetri: Illegal value on argument %" PetscBLASInt_FMT,PetscAbsInt(info));
1052 PetscCheck(info<=0,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"LAPACKgetri: Matrix is singular. U(%" PetscBLASInt_FMT ",%" PetscBLASInt_FMT ") is zero",info,info);
1053 PetscCallCUBLAS(cublasXscal(cublasv2handle,ld2,&stwo,d_P,one));
1054 PetscCall(shift_diagonal(n,d_P,ld,sone));
1055 }
1056 PetscCall(PetscLogGpuFlops(2.0*n*n*n+2.0*n*n*n/3.0+4.0*n*n));
1057
1058 for (k=1;k<=sexp;k++) {
1059 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_P,ld,d_P,ld,&szero,d_W,ld));
1060 PetscCallCUDA(cudaMemcpy(d_P,d_W,sizeof(PetscScalar)*ld2,cudaMemcpyDeviceToDevice));
1061 }
1062 PetscCall(PetscLogGpuFlops(2.0*n*n*n*sexp));
1063
1064 PetscCall(PetscLogGpuTimeEnd());
1065 PetscCallCUDA(cudaFree(d_Q));
1066 PetscCallCUDA(cudaFree(d_W));
1067 PetscCallCUDA(cudaFree(d_A2));
1068 PetscCallCUDA(cudaFree(d_ipiv));
1069 PetscCallCUDA(cudaFree(d_info));
1070 PetscCallCUDA(cudaFree(d_ppP));
1071 PetscCallCUDA(cudaFree(d_ppQ));
1072
1073 PetscCall(PetscFree(ppP));
1074 PetscCall(PetscFree(ppQ));
1075
1076 if (d_P!=d_Ba) PetscCallCUDA(cudaMemcpy(d_Ba,d_P,sizeof(PetscScalar)*ld2,cudaMemcpyDeviceToDevice));
1077 if (A!=B) {
1078 if (s>0.5) { /* undo scaling */
1079 scale = 1.0/scale;
1080 PetscCallCUBLAS(cublasXscal(cublasv2handle,ld2,&scale,d_As,one));
1081 }
1082 PetscCall(MatDenseCUDARestoreArrayRead(A,(const PetscScalar**)&d_As));
1083 } else PetscCallCUDA(cudaFree(d_As));
1084 PetscCall(MatDenseCUDARestoreArrayWrite(B,&d_Ba));
1085 PetscFunctionReturn(PETSC_SUCCESS);
1086 }
1087
1088 #if defined(PETSC_HAVE_MAGMA)
1089 #include <slepcmagma.h>
1090
1091 16 PetscErrorCode FNEvaluateFunctionMat_Exp_Pade_CUDAm(FN fn,Mat A,Mat B)
1092 {
1093 16 PetscBLASInt n=0,ld,ld2,*piv,one=1;
1094 16 PetscInt m,k,sexp;
1095 16 PetscBool odd;
1096 16 const PetscInt p=MAX_PADE;
1097 16 PetscReal c[MAX_PADE+1],s;
1098 16 PetscScalar scale,smone=-1.0,sone=1.0,stwo=2.0,szero=0.0;
1099 16 const PetscScalar *Aa;
1100 16 PetscScalar *d_Ba,*d_As,*d_A2,*d_Q,*d_P,*d_W,*aux;
1101 16 cublasHandle_t cublasv2handle;
1102
1103 16 PetscFunctionBegin;
1104
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); /* For CUDA event timers */
1105
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscCUBLASGetHandle(&cublasv2handle));
1106
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(SlepcMagmaInit());
1107
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(MatGetSize(A,&m,NULL));
1108
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscBLASIntCast(m,&n));
1109 16 ld = n;
1110 16 ld2 = ld*ld;
1111
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
16 if (A==B) {
1112
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
4 PetscCallCUDA(cudaMalloc((void **)&d_As,sizeof(PetscScalar)*m*m));
1113
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4 PetscCall(MatDenseCUDAGetArrayRead(A,&Aa));
1114
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
4 PetscCallCUDA(cudaMemcpy(d_As,Aa,sizeof(PetscScalar)*ld2,cudaMemcpyDeviceToDevice));
1115
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4 PetscCall(MatDenseCUDARestoreArrayRead(A,&Aa));
1116
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
12 } else PetscCall(MatDenseCUDAGetArrayRead(A,(const PetscScalar**)&d_As));
1117
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(MatDenseCUDAGetArrayWrite(B,&d_Ba));
1118
1119
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaMalloc((void **)&d_Q,sizeof(PetscScalar)*m*m));
1120
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaMalloc((void **)&d_W,sizeof(PetscScalar)*m*m));
1121
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaMalloc((void **)&d_A2,sizeof(PetscScalar)*m*m));
1122
1123
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscMalloc1(n,&piv));
1124
1125 16 d_P = d_Ba;
1126
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscLogGpuTimeBegin());
1127
1128 /* Pade' coefficients */
1129 16 c[0] = 1.0;
1130
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
112 for (k=1;k<=p;k++) c[k] = c[k-1]*(p+1-k)/(k*(2*p+1-k));
1131
1132 /* Scaling */
1133
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
16 PetscCallCUBLAS(cublasXnrm2(cublasv2handle,ld2,d_As,one,&s));
1134
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscLogGpuFlops(1.0*n*n));
1135
1136
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
16 if (s>0.5) {
1137
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
16 sexp = PetscMax(0,(int)(PetscLogReal(s)/PetscLogReal(2.0))+2);
1138 16 scale = PetscPowRealInt(2.0,-sexp);
1139
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
16 PetscCallCUBLAS(cublasXscal(cublasv2handle,ld2,&scale,d_As,one));
1140
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscLogGpuFlops(1.0*n*n));
1141 } else sexp = 0;
1142
1143 /* Horner evaluation */
1144
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
16 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_As,ld,d_As,ld,&szero,d_A2,ld));
1145
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscLogGpuFlops(2.0*n*n*n));
1146
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaMemset(d_Q,0,sizeof(PetscScalar)*ld2));
1147
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaMemset(d_P,0,sizeof(PetscScalar)*ld2));
1148
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(set_diagonal(n,d_Q,ld,c[p]));
1149
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(set_diagonal(n,d_P,ld,c[p-1]));
1150
1151 odd = PETSC_TRUE;
1152
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
96 for (k=p-1;k>0;k--) {
1153
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
80 if (odd) {
1154
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
48 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_Q,ld,d_A2,ld,&szero,d_W,ld));
1155 48 SlepcSwap(d_Q,d_W,aux);
1156
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(shift_diagonal(n,d_Q,ld,c[k-1]));
1157 odd = PETSC_FALSE;
1158 } else {
1159
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_P,ld,d_A2,ld,&szero,d_W,ld));
1160 32 SlepcSwap(d_P,d_W,aux);
1161
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(shift_diagonal(n,d_P,ld,c[k-1]));
1162 odd = PETSC_TRUE;
1163 }
1164
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
80 PetscCall(PetscLogGpuFlops(2.0*n*n*n));
1165 }
1166
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 if (odd) {
1167 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_Q,ld,d_As,ld,&szero,d_W,ld));
1168 SlepcSwap(d_Q,d_W,aux);
1169 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,ld2,&smone,d_P,one,d_Q,one));
1170 PetscCallMAGMA(magma_xgesv_gpu,n,n,d_Q,ld,piv,d_P,ld);
1171 PetscCallCUBLAS(cublasXscal(cublasv2handle,ld2,&stwo,d_P,one));
1172 PetscCall(shift_diagonal(n,d_P,ld,sone));
1173 PetscCallCUBLAS(cublasXscal(cublasv2handle,ld2,&smone,d_P,one));
1174 } else {
1175
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
16 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_P,ld,d_As,ld,&szero,d_W,ld));
1176 16 SlepcSwap(d_P,d_W,aux);
1177
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
16 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,ld2,&smone,d_P,one,d_Q,one));
1178
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
16 PetscCallMAGMA(magma_xgesv_gpu,n,n,d_Q,ld,piv,d_P,ld);
1179
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
16 PetscCallCUBLAS(cublasXscal(cublasv2handle,ld2,&stwo,d_P,one));
1180
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(shift_diagonal(n,d_P,ld,sone));
1181 }
1182
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscLogGpuFlops(2.0*n*n*n+2.0*n*n*n/3.0+4.0*n*n));
1183
1184
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
96 for (k=1;k<=sexp;k++) {
1185
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
80 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_P,ld,d_P,ld,&szero,d_W,ld));
1186
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
80 PetscCallCUDA(cudaMemcpy(d_P,d_W,sizeof(PetscScalar)*ld2,cudaMemcpyDeviceToDevice));
1187 }
1188
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscLogGpuFlops(2.0*n*n*n*sexp));
1189
1190
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscLogGpuTimeEnd());
1191
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaFree(d_Q));
1192
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaFree(d_W));
1193
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaFree(d_A2));
1194
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
16 PetscCall(PetscFree(piv));
1195
1196
1/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
16 if (d_P!=d_Ba) PetscCallCUDA(cudaMemcpy(d_Ba,d_P,sizeof(PetscScalar)*ld2,cudaMemcpyDeviceToDevice));
1197
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
16 if (A!=B) {
1198
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
12 if (s>0.5) { /* undo scaling */
1199 12 scale = 1.0/scale;
1200
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
12 PetscCallCUBLAS(cublasXscal(cublasv2handle,ld2,&scale,d_As,one));
1201 }
1202
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
12 PetscCall(MatDenseCUDARestoreArrayRead(A,(const PetscScalar**)&d_As));
1203
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
4 } else PetscCallCUDA(cudaFree(d_As));
1204
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(MatDenseCUDARestoreArrayWrite(B,&d_Ba));
1205 PetscFunctionReturn(PETSC_SUCCESS);
1206 }
1207
1208 /*
1209 * Matrix exponential implementation based on algorithm and matlab code by N. Higham and co-authors
1210 *
1211 * N. J. Higham, "The scaling and squaring method for the matrix exponential
1212 * revisited", SIAM J. Matrix Anal. Appl. 26(4):1179-1193, 2005.
1213 */
1214 48 PetscErrorCode FNEvaluateFunctionMat_Exp_Higham_CUDAm(FN fn,Mat A,Mat B)
1215 {
1216 48 PetscBLASInt n_=0,n2,*ipiv,one=1;
1217 48 PetscInt n,m,j,s;
1218 48 PetscScalar scale,smone=-1.0,sone=1.0,stwo=2.0,szero=0.0;
1219 48 PetscScalar *d_Ba,*Apowers[5],*d_Apowers[5],*d_Q,*d_P,*d_W,*work,*d_work,*aux;
1220 48 const PetscScalar *Aa,*c;
1221 48 const PetscScalar c3[4] = { 120, 60, 12, 1 };
1222 48 const PetscScalar c5[6] = { 30240, 15120, 3360, 420, 30, 1 };
1223 48 const PetscScalar c7[8] = { 17297280, 8648640, 1995840, 277200, 25200, 1512, 56, 1 };
1224 48 const PetscScalar c9[10] = { 17643225600, 8821612800, 2075673600, 302702400, 30270240,
1225 2162160, 110880, 3960, 90, 1 };
1226 48 const PetscScalar c13[14] = { 64764752532480000, 32382376266240000, 7771770303897600,
1227 1187353796428800, 129060195264000, 10559470521600,
1228 670442572800, 33522128640, 1323241920,
1229 40840800, 960960, 16380, 182, 1 };
1230 48 cublasHandle_t cublasv2handle;
1231
1232 48 PetscFunctionBegin;
1233
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); /* For CUDA event timers */
1234
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscCUBLASGetHandle(&cublasv2handle));
1235
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(SlepcMagmaInit());
1236
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(MatGetSize(A,&n,NULL));
1237
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscBLASIntCast(n,&n_));
1238 48 n2 = n_*n_;
1239
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscMalloc2(8*n*n,&work,n,&ipiv));
1240 /* Matrix powers */
1241 48 Apowers[0] = work; /* Apowers[0] = A */
1242 48 Apowers[1] = Apowers[0] + n*n; /* Apowers[1] = A^2 */
1243 48 Apowers[2] = Apowers[1] + n*n; /* Apowers[2] = A^4 */
1244 48 Apowers[3] = Apowers[2] + n*n; /* Apowers[3] = A^6 */
1245 48 Apowers[4] = Apowers[3] + n*n; /* Apowers[4] = A^8 */
1246
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
48 if (A==B) {
1247
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
4 PetscCallCUDA(cudaMalloc((void**)&d_work,7*n*n*sizeof(PetscScalar)));
1248 4 d_Apowers[0] = d_work; /* d_Apowers[0] = A */
1249 4 d_Apowers[1] = d_Apowers[0] + n*n; /* d_Apowers[1] = A^2 */
1250
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4 PetscCall(MatDenseCUDAGetArrayRead(A,&Aa));
1251
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
4 PetscCallCUDA(cudaMemcpy(d_Apowers[0],Aa,n2*sizeof(PetscScalar),cudaMemcpyDeviceToDevice));
1252
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
4 PetscCall(MatDenseCUDARestoreArrayRead(A,&Aa));
1253 } else {
1254
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
44 PetscCallCUDA(cudaMalloc((void**)&d_work,6*n*n*sizeof(PetscScalar)));
1255
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
44 PetscCall(MatDenseCUDAGetArrayRead(A,(const PetscScalar**)&d_Apowers[0]));
1256 44 d_Apowers[1] = d_work; /* d_Apowers[1] = A^2 */
1257 }
1258
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(MatDenseCUDAGetArrayWrite(B,&d_Ba));
1259 48 d_Apowers[2] = d_Apowers[1] + n*n; /* d_Apowers[2] = A^4 */
1260 48 d_Apowers[3] = d_Apowers[2] + n*n; /* d_Apowers[3] = A^6 */
1261 48 d_Apowers[4] = d_Apowers[3] + n*n; /* d_Apowers[4] = A^8 */
1262 48 d_Q = d_Apowers[4] + n*n;
1263 48 d_W = d_Q + n*n;
1264
1265
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscLogGpuTimeBegin());
1266
1267
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
48 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n_,n_,n_,&sone,d_Apowers[0],n_,d_Apowers[0],n_,&szero,d_Apowers[1],n_));
1268
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
48 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n_,n_,n_,&sone,d_Apowers[1],n_,d_Apowers[1],n_,&szero,d_Apowers[2],n_));
1269
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
48 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n_,n_,n_,&sone,d_Apowers[1],n_,d_Apowers[2],n_,&szero,d_Apowers[3],n_));
1270
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscLogGpuFlops(6.0*n*n*n));
1271
1272
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMemcpy(Apowers[0],d_Apowers[0],n2*sizeof(PetscScalar),cudaMemcpyDeviceToHost));
1273
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMemcpy(Apowers[1],d_Apowers[1],3*n2*sizeof(PetscScalar),cudaMemcpyDeviceToHost));
1274
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscLogGpuToCpu(4*n2*sizeof(PetscScalar)));
1275 /* Compute scaling parameter and order of Pade approximant */
1276
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(expm_params(n,Apowers,&s,&m,Apowers[4]));
1277
1278
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
48 if (s) { /* rescale */
1279
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
80 for (j=0;j<4;j++) {
1280
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
64 scale = PetscPowRealInt(2.0,-PetscMax(2*j,1)*s);
1281
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
64 PetscCallCUBLAS(cublasXscal(cublasv2handle,n2,&scale,d_Apowers[j],one));
1282 }
1283
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscLogGpuFlops(4.0*n*n));
1284 }
1285
1286 /* Evaluate the Pade approximant */
1287
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
48 switch (m) {
1288 case 3: c = c3; break;
1289 case 5: c = c5; break;
1290 16 case 7: c = c7; break;
1291 case 9: c = c9; break;
1292 32 case 13: c = c13; break;
1293 default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong value of m %" PetscInt_FMT,m);
1294 }
1295 48 d_P = d_Ba;
1296
2/3
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
48 switch (m) {
1297 16 case 3:
1298 case 5:
1299 case 7:
1300 case 9:
1301
1/12
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
16 if (m==9) PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n_,n_,n_,&sone,d_Apowers[1],n_,d_Apowers[3],n_,&szero,d_Apowers[4],n_));
1302
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaMemset(d_P,0,sizeof(PetscScalar)*n2));
1303
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaMemset(d_Q,0,sizeof(PetscScalar)*n2));
1304
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(set_diagonal(n,d_P,n,c[1]));
1305
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(set_diagonal(n,d_Q,n,c[0]));
1306
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
64 for (j=m;j>=3;j-=2) {
1307
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
48 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,n2,&c[j],d_Apowers[(j+1)/2-1],one,d_P,one));
1308
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
48 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,n2,&c[j-1],d_Apowers[(j+1)/2-1],one,d_Q,one));
1309
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscLogGpuFlops(4.0*n*n));
1310 }
1311
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
16 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n_,n_,n_,&sone,d_Apowers[0],n_,d_P,n_,&szero,d_W,n_));
1312
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscLogGpuFlops(2.0*n*n*n));
1313 48 SlepcSwap(d_P,d_W,aux);
1314 break;
1315 32 case 13:
1316 /* P = A*(Apowers[3]*(c[13]*Apowers[3] + c[11]*Apowers[2] + c[9]*Apowers[1])
1317 + c[7]*Apowers[3] + c[5]*Apowers[2] + c[3]*Apowers[1] + c[1]*I) */
1318
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaMemcpy(d_P,d_Apowers[3],n2*sizeof(PetscScalar),cudaMemcpyDeviceToDevice));
1319
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXscal(cublasv2handle,n2,&c[13],d_P,one));
1320
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,n2,&c[11],d_Apowers[2],one,d_P,one));
1321
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,n2,&c[9],d_Apowers[1],one,d_P,one));
1322
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n_,n_,n_,&sone,d_Apowers[3],n_,d_P,n_,&szero,d_W,n_));
1323
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscLogGpuFlops(5.0*n*n+2.0*n*n*n));
1324
1325
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaMemset(d_P,0,sizeof(PetscScalar)*n2));
1326
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(set_diagonal(n,d_P,n,c[1]));
1327
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,n2,&c[7],d_Apowers[3],one,d_P,one));
1328
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,n2,&c[5],d_Apowers[2],one,d_P,one));
1329
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,n2,&c[3],d_Apowers[1],one,d_P,one));
1330
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,n2,&sone,d_P,one,d_W,one));
1331
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n_,n_,n_,&sone,d_Apowers[0],n_,d_W,n_,&szero,d_P,n_));
1332
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscLogGpuFlops(7.0*n*n+2.0*n*n*n));
1333 /* Q = Apowers[3]*(c[12]*Apowers[3] + c[10]*Apowers[2] + c[8]*Apowers[1])
1334 + c[6]*Apowers[3] + c[4]*Apowers[2] + c[2]*Apowers[1] + c[0]*I */
1335
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaMemcpy(d_Q,d_Apowers[3],n2*sizeof(PetscScalar),cudaMemcpyDeviceToDevice));
1336
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXscal(cublasv2handle,n2,&c[12],d_Q,one));
1337
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,n2,&c[10],d_Apowers[2],one,d_Q,one));
1338
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,n2,&c[8],d_Apowers[1],one,d_Q,one));
1339
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n_,n_,n_,&sone,d_Apowers[3],n_,d_Q,n_,&szero,d_W,n_));
1340
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscLogGpuFlops(5.0*n*n+2.0*n*n*n));
1341
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaMemset(d_Q,0,sizeof(PetscScalar)*n2));
1342
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(set_diagonal(n,d_Q,n,c[0]));
1343
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,n2,&c[6],d_Apowers[3],one,d_Q,one));
1344
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,n2,&c[4],d_Apowers[2],one,d_Q,one));
1345
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,n2,&c[2],d_Apowers[1],one,d_Q,one));
1346
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,n2,&sone,d_W,one,d_Q,one));
1347
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscLogGpuFlops(7.0*n*n));
1348 break;
1349 default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong value of m %" PetscInt_FMT,m);
1350 }
1351
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
48 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,n2,&smone,d_P,one,d_Q,one));
1352
1353
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
48 PetscCallMAGMA(magma_xgesv_gpu,n_,n_,d_Q,n_,ipiv,d_P,n_);
1354
1355
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
48 PetscCallCUBLAS(cublasXscal(cublasv2handle,n2,&stwo,d_P,one));
1356
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(shift_diagonal(n,d_P,n,sone));
1357
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscLogGpuFlops(2.0*n*n*n/3.0+4.0*n*n));
1358
1359 /* Squaring */
1360
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
64 for (j=1;j<=s;j++) {
1361
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
16 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n_,n_,n_,&sone,d_P,n_,d_P,n_,&szero,d_W,n_));
1362 16 SlepcSwap(d_P,d_W,aux);
1363 }
1364
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscLogGpuFlops(2.0*n*n*n*s));
1365
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscLogGpuTimeEnd());
1366
1367
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscFree2(work,ipiv));
1368
3/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
48 if (d_P!=d_Ba) PetscCallCUDA(cudaMemcpy(d_Ba,d_P,n2*sizeof(PetscScalar),cudaMemcpyDeviceToDevice));
1369
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
48 if (A!=B) {
1370
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
44 if (s>0.5) { /* undo scaling */
1371 12 scale = 1.0/PetscPowRealInt(2.0,-s);
1372
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
12 PetscCallCUBLAS(cublasXscal(cublasv2handle,n2,&scale,d_Apowers[0],one));
1373 }
1374
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
44 PetscCall(MatDenseCUDARestoreArrayRead(A,(const PetscScalar**)&d_Apowers[0]));
1375 }
1376
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(MatDenseCUDARestoreArrayWrite(B,&d_Ba));
1377
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaFree(d_work));
1378 PetscFunctionReturn(PETSC_SUCCESS);
1379 }
1380
1381 /*
1382 * Matrix exponential implementation based on algorithm and matlab code by Stefan Guettel
1383 * and Yuji Nakatsukasa
1384 *
1385 * Stefan Guettel and Yuji Nakatsukasa, "Scaled and Squared Subdiagonal Pade'
1386 * Approximation for the Matrix Exponential",
1387 * SIAM J. Matrix Anal. Appl. 37(1):145-170, 2016.
1388 * https://doi.org/10.1137/15M1027553
1389 */
1390 32 PetscErrorCode FNEvaluateFunctionMat_Exp_GuettelNakatsukasa_CUDAm(FN fn,Mat A,Mat B)
1391 {
1392 32 PetscInt i,j,n_,s,k,m,mod;
1393 32 PetscBLASInt n=0,n2=0,irsize=0,rsizediv2,ipsize=0,iremainsize=0,query=-1,*piv,minlen,lwork=0,one=1;
1394 32 PetscReal nrm,shift=0.0,rone=1.0,rzero=0.0;
1395 #if defined(PETSC_USE_COMPLEX)
1396 16 PetscReal *rwork=NULL;
1397 #endif
1398 32 PetscComplex *d_As,*d_RR,*d_RR2,*d_expmA,*d_expmA2,*d_Maux,*d_Maux2,rsize,*r,psize,*p,remainsize,*remainterm,*rootp,*rootq,mult=0.0,scale,cone=1.0,czero=0.0,*aux;
1399 32 PetscScalar *d_Aa,*d_Ba,*d_Ba2,*Maux,*d_sMaux,*wr,*wi,expshift,sone=1.0,szero=0.0,*work,work1,*saux;
1400 32 const PetscScalar *Aa;
1401 32 PetscBool isreal,*d_isreal,flg;
1402 32 cublasHandle_t cublasv2handle;
1403
1404 32 PetscFunctionBegin;
1405
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); /* For CUDA event timers */
1406
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscCUBLASGetHandle(&cublasv2handle));
1407
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(SlepcMagmaInit());
1408
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(MatGetSize(A,&n_,NULL));
1409
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscBLASIntCast(n_,&n));
1410
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscBLASIntCast(n*n,&n2));
1411
1412
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
32 if (A==B) {
1413
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
8 PetscCallCUDA(cudaMalloc((void **)&d_Aa,sizeof(PetscScalar)*n2));
1414
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
8 PetscCall(MatDenseCUDAGetArrayRead(A,&Aa));
1415
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
8 PetscCallCUDA(cudaMemcpy(d_Aa,Aa,sizeof(PetscScalar)*n2,cudaMemcpyDeviceToDevice));
1416
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
8 PetscCall(MatDenseCUDARestoreArrayRead(A,&Aa));
1417
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
24 } else PetscCall(MatDenseCUDAGetArrayRead(A,(const PetscScalar**)&d_Aa));
1418
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(MatDenseCUDAGetArrayWrite(B,&d_Ba));
1419 32 d_Ba2 = d_Ba;
1420
1421
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaMalloc((void **)&d_isreal,sizeof(PetscBool)));
1422
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaMalloc((void **)&d_sMaux,sizeof(PetscScalar)*n2));
1423
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaMalloc((void **)&d_Maux,sizeof(PetscComplex)*n2));
1424
1425
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscLogGpuTimeBegin());
1426 32 d_Maux2 = d_Maux;
1427
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscOptionsGetReal(NULL,NULL,"-fn_expm_estimated_eig",&shift,&flg));
1428
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
32 if (!flg) {
1429
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscMalloc2(n,&wr,n,&wi));
1430 /* estimate rightmost eigenvalue and shift A with it */
1431
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscMalloc1(n2,&Maux));
1432
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(MatDenseGetArrayRead(A,&Aa));
1433
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscArraycpy(Maux,Aa,n2));
1434
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(MatDenseRestoreArrayRead(A,&Aa));
1435 #if !defined(PETSC_USE_COMPLEX)
1436
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
16 PetscCallMAGMA(magma_xgeev,MagmaNoVec,MagmaNoVec,n,Maux,n,wr,wi,NULL,n,NULL,n,&work1,query);
1437
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
16 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(work1),&lwork));
1438
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
16 PetscCall(PetscMalloc1(lwork,&work));
1439
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
16 PetscCallMAGMA(magma_xgeev,MagmaNoVec,MagmaNoVec,n,Maux,n,wr,wi,NULL,n,NULL,n,work,lwork);
1440
2/4
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
16 PetscCall(PetscFree(work));
1441 #else
1442
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
16 PetscCallMAGMA(magma_xgeev,MagmaNoVec,MagmaNoVec,n,Maux,n,wr,NULL,n,NULL,n,&work1,query,rwork);
1443
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
16 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(work1),&lwork));
1444
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
16 PetscCall(PetscMalloc2(2*n,&rwork,lwork,&work));
1445
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
16 PetscCallMAGMA(magma_xgeev,MagmaNoVec,MagmaNoVec,n,Maux,n,wr,NULL,n,NULL,n,work,lwork,rwork);
1446
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
16 PetscCall(PetscFree2(rwork,work));
1447 #endif
1448
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
32 PetscCall(PetscFree(Maux));
1449
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscLogGpuFlops(25.0*n*n*n+(n*n*n)/3.0+1.0*n*n*n));
1450
1451 32 shift = PetscRealPart(wr[0]);
1452
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
320 for (i=1;i<n;i++) {
1453
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
288 if (PetscRealPart(wr[i]) > shift) shift = PetscRealPart(wr[i]);
1454 }
1455
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscFree2(wr,wi));
1456 }
1457 /* shift so that largest real part is (about) 0 */
1458
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaMemcpy(d_sMaux,d_Aa,sizeof(PetscScalar)*n2,cudaMemcpyDeviceToDevice));
1459
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
32 if (shift) {
1460
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(shift_diagonal(n,d_sMaux,n,-shift));
1461
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscLogGpuFlops(1.0*n));
1462 }
1463 #if defined(PETSC_USE_COMPLEX)
1464
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaMemcpy(d_Maux,d_Aa,sizeof(PetscScalar)*n2,cudaMemcpyDeviceToDevice));
1465
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
16 if (shift) {
1466
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
16 PetscCall(shift_diagonal(n,d_Maux,n,-shift));
1467
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
16 PetscCall(PetscLogGpuFlops(1.0*n));
1468 }
1469 #endif
1470
3/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
32 if (A!=B) PetscCall(MatDenseCUDARestoreArrayRead(A,(const PetscScalar**)&d_Aa));
1471
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
8 else PetscCallCUDA(cudaFree(d_Aa));
1472
1473 /* estimate norm(A) and select the scaling factor */
1474
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXnrm2(cublasv2handle,n2,d_sMaux,one,&nrm));
1475
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscLogGpuFlops(2.0*n*n));
1476
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(sexpm_params(nrm,&s,&k,&m));
1477
1/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
32 if (s==0 && k==1 && m==0) { /* exp(A) = I+A to eps! */
1478 if (shift) expshift = PetscExpReal(shift);
1479 PetscCall(shift_Cdiagonal(n,d_Maux,n,rone,rzero));
1480 if (shift) {
1481 PetscCallCUBLAS(cublasXscal(cublasv2handle,n2,&expshift,d_sMaux,one));
1482 PetscCall(PetscLogGpuFlops(1.0*(n+n2)));
1483 } else PetscCall(PetscLogGpuFlops(1.0*n));
1484 PetscCallCUDA(cudaMemcpy(d_Ba,d_sMaux,sizeof(PetscScalar)*n2,cudaMemcpyDeviceToDevice));
1485 PetscCallCUDA(cudaFree(d_isreal));
1486 PetscCallCUDA(cudaFree(d_sMaux));
1487 PetscCallCUDA(cudaFree(d_Maux));
1488 PetscCall(MatDenseCUDARestoreArrayWrite(B,&d_Ba));
1489 PetscFunctionReturn(PETSC_SUCCESS); /* quick return */
1490 }
1491
1492
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaMalloc((void **)&d_expmA,sizeof(PetscComplex)*n2));
1493
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaMalloc((void **)&d_As,sizeof(PetscComplex)*n2));
1494
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaMalloc((void **)&d_RR,sizeof(PetscComplex)*n2));
1495 32 d_expmA2 = d_expmA; d_RR2 = d_RR;
1496
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscMalloc1(n,&piv));
1497 /* scale matrix */
1498 #if !defined(PETSC_USE_COMPLEX)
1499
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
16 PetscCall(copy_array2D_S2C(n,n,d_As,n,d_sMaux,n));
1500 #else
1501
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaMemcpy(d_As,d_sMaux,sizeof(PetscScalar)*n2,cudaMemcpyDeviceToDevice));
1502 #endif
1503 32 scale = 1.0/PetscPowRealInt(2.0,s);
1504
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXCscal(cublasv2handle,n2,(const cuComplex *)&scale,(cuComplex *)d_As,one));
1505
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(SlepcLogGpuFlopsComplex(1.0*n2));
1506
1507 /* evaluate Pade approximant (partial fraction or product form) */
1508
3/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
32 if (fn->method==3 || !m) { /* partial fraction */
1509
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(getcoeffs(k,m,&rsize,&psize,&remainsize,PETSC_TRUE));
1510
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPartComplex(rsize),&irsize));
1511
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPartComplex(psize),&ipsize));
1512
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPartComplex(remainsize),&iremainsize));
1513
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscMalloc3(irsize,&r,ipsize,&p,iremainsize,&remainterm));
1514
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(getcoeffs(k,m,r,p,remainterm,PETSC_FALSE));
1515
1516
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaMemset(d_expmA,0,sizeof(PetscComplex)*n2));
1517 #if !defined(PETSC_USE_COMPLEX)
1518 8 isreal = PETSC_TRUE;
1519 #else
1520
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
8 PetscCall(getisreal_array2D(n,n,d_Maux,n,d_isreal));
1521
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
8 PetscCallCUDA(cudaMemcpy(&isreal,d_isreal,sizeof(PetscBool),cudaMemcpyDeviceToHost));
1522 #endif
1523
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
16 if (isreal) {
1524 16 rsizediv2 = irsize/2;
1525
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
48 for (i=0;i<rsizediv2;i++) { /* use partial fraction to get R(As) */
1526
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaMemcpy(d_Maux,d_As,sizeof(PetscComplex)*n2,cudaMemcpyDeviceToDevice));
1527
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaMemset(d_RR,0,sizeof(PetscComplex)*n2));
1528
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(shift_Cdiagonal(n,d_Maux,n,-PetscRealPartComplex(p[2*i]),-PetscImaginaryPartComplex(p[2*i])));
1529
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(set_Cdiagonal(n,d_RR,n,PetscRealPartComplex(r[2*i]),PetscImaginaryPartComplex(r[2*i])));
1530
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
32 PetscCallMAGMA(magma_Cgesv_gpu,n,n,d_Maux,n,piv,d_RR,n);
1531
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(add_array2D_Conj(n,n,d_RR,n));
1532
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXCaxpy(cublasv2handle,n2,&cone,d_RR,one,d_expmA,one));
1533 /* shift(n) + gesv + axpy(n2) */
1534
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(SlepcLogGpuFlopsComplex(1.0*n+(2.0*n*n*n/3.0+2.0*n*n*n)+2.0*n2));
1535 }
1536
1537 16 mod = ipsize % 2;
1538
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
16 if (mod) {
1539 PetscCallCUDA(cudaMemcpy(d_Maux,d_As,sizeof(PetscComplex)*n2,cudaMemcpyDeviceToDevice));
1540 PetscCallCUDA(cudaMemset(d_RR,0,sizeof(PetscComplex)*n2));
1541 PetscCall(shift_Cdiagonal(n,d_Maux,n,-PetscRealPartComplex(p[ipsize-1]),-PetscImaginaryPartComplex(p[ipsize-1])));
1542 PetscCall(set_Cdiagonal(n,d_RR,n,PetscRealPartComplex(r[irsize-1]),PetscImaginaryPartComplex(r[irsize-1])));
1543 PetscCallMAGMA(magma_Cgesv_gpu,n,n,d_Maux,n,piv,d_RR,n);
1544 PetscCallCUBLAS(cublasXCaxpy(cublasv2handle,n2,&cone,d_RR,one,d_expmA,one));
1545 PetscCall(SlepcLogGpuFlopsComplex(1.0*n+(2.0*n*n*n/3.0+2.0*n*n*n)+1.0*n2));
1546 }
1547 } else { /* complex */
1548 for (i=0;i<irsize;i++) { /* use partial fraction to get R(As) */
1549 PetscCallCUDA(cudaMemcpy(d_Maux,d_As,sizeof(PetscComplex)*n2,cudaMemcpyDeviceToDevice));
1550 PetscCallCUDA(cudaMemset(d_RR,0,sizeof(PetscComplex)*n2));
1551 PetscCall(shift_Cdiagonal(n,d_Maux,n,-PetscRealPartComplex(p[i]),-PetscImaginaryPartComplex(p[i])));
1552 PetscCall(set_Cdiagonal(n,d_RR,n,PetscRealPartComplex(r[i]),PetscImaginaryPartComplex(r[i])));
1553 PetscCallMAGMA(magma_Cgesv_gpu,n,n,d_Maux,n,piv,d_RR,n);
1554 PetscCallCUBLAS(cublasXCaxpy(cublasv2handle,n2,&cone,d_RR,one,d_expmA,one));
1555 PetscCall(SlepcLogGpuFlopsComplex(1.0*n+(2.0*n*n*n/3.0+2.0*n*n*n)+1.0*n2));
1556 }
1557 }
1558
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
48 for (i=0;i<iremainsize;i++) {
1559
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
32 if (!i) {
1560
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaMemset(d_RR,0,sizeof(PetscComplex)*n2));
1561
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(set_Cdiagonal(n,d_RR,n,PetscRealPartComplex(remainterm[iremainsize-1]),PetscImaginaryPartComplex(remainterm[iremainsize-1])));
1562 } else {
1563
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaMemcpy(d_RR,d_As,sizeof(PetscComplex)*n2,cudaMemcpyDeviceToDevice));
1564
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 for (j=1;j<i;j++) {
1565 PetscCallCUBLAS(cublasXCgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&cone,d_RR,n,d_RR,n,&czero,d_Maux,n));
1566 SlepcSwap(d_RR,d_Maux,aux);
1567 PetscCall(SlepcLogGpuFlopsComplex(2.0*n*n*n));
1568 }
1569
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
16 PetscCallCUBLAS(cublasXCscal(cublasv2handle,n2,&remainterm[iremainsize-1-i],d_RR,one));
1570
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(SlepcLogGpuFlopsComplex(1.0*n2));
1571 }
1572
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXCaxpy(cublasv2handle,n2,&cone,d_RR,one,d_expmA,one));
1573
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(SlepcLogGpuFlopsComplex(1.0*n2));
1574 }
1575
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscFree3(r,p,remainterm));
1576 } else { /* product form, default */
1577
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(getcoeffsproduct(k,m,&rsize,&psize,&mult,PETSC_TRUE));
1578
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPartComplex(rsize),&irsize));
1579
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPartComplex(psize),&ipsize));
1580
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscMalloc2(irsize,&rootp,ipsize,&rootq));
1581
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(getcoeffsproduct(k,m,rootp,rootq,&mult,PETSC_FALSE));
1582
1583
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaMemset(d_expmA,0,sizeof(PetscComplex)*n2));
1584
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(set_Cdiagonal(n,d_expmA,n,rone,rzero)); /* initialize */
1585 16 minlen = PetscMin(irsize,ipsize);
1586
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
80 for (i=0;i<minlen;i++) {
1587
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
64 PetscCallCUDA(cudaMemcpy(d_RR,d_As,sizeof(PetscComplex)*n2,cudaMemcpyDeviceToDevice));
1588
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
64 PetscCall(shift_Cdiagonal(n,d_RR,n,-PetscRealPartComplex(rootp[i]),-PetscImaginaryPartComplex(rootp[i])));
1589
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
64 PetscCallCUBLAS(cublasXCgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&cone,d_RR,n,d_expmA,n,&czero,d_Maux,n));
1590 64 SlepcSwap(d_expmA,d_Maux,aux);
1591
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
64 PetscCallCUDA(cudaMemcpy(d_RR,d_As,sizeof(PetscComplex)*n2,cudaMemcpyDeviceToDevice));
1592
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
64 PetscCall(shift_Cdiagonal(n,d_RR,n,-PetscRealPartComplex(rootq[i]),-PetscImaginaryPartComplex(rootq[i])));
1593
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
64 PetscCallMAGMA(magma_Cgesv_gpu,n,n,d_RR,n,piv,d_expmA,n);
1594 /* shift(n) + gemm + shift(n) + gesv */
1595
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
64 PetscCall(SlepcLogGpuFlopsComplex(1.0*n+(2.0*n*n*n)+1.0*n+(2.0*n*n*n/3.0+2.0*n*n*n)));
1596 }
1597 /* extra enumerator */
1598
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
32 for (i=minlen;i<irsize;i++) {
1599
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaMemcpy(d_RR,d_As,sizeof(PetscComplex)*n2,cudaMemcpyDeviceToDevice));
1600
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(shift_Cdiagonal(n,d_RR,n,-PetscRealPartComplex(rootp[i]),-PetscImaginaryPartComplex(rootp[i])));
1601
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
16 PetscCallCUBLAS(cublasXCgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&cone,d_RR,n,d_expmA,n,&czero,d_Maux,n));
1602 16 SlepcSwap(d_expmA,d_Maux,aux);
1603
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(SlepcLogGpuFlopsComplex(1.0*n+2.0*n*n*n));
1604 }
1605 /* extra denominator */
1606
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 for (i=minlen;i<ipsize;i++) {
1607 PetscCallCUDA(cudaMemcpy(d_RR,d_As,sizeof(PetscComplex)*n2,cudaMemcpyDeviceToDevice));
1608 PetscCall(shift_Cdiagonal(n,d_RR,n,-PetscRealPartComplex(rootq[i]),-PetscImaginaryPartComplex(rootq[i])));
1609 PetscCallMAGMA(magma_Cgesv_gpu,n,n,d_RR,n,piv,d_expmA,n);
1610 PetscCall(SlepcLogGpuFlopsComplex(1.0*n+(2.0*n*n*n/3.0+2.0*n*n*n)));
1611 }
1612
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
16 PetscCallCUBLAS(cublasXCscal(cublasv2handle,n2,&mult,d_expmA,one));
1613
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(SlepcLogGpuFlopsComplex(1.0*n2));
1614
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
16 PetscCall(PetscFree2(rootp,rootq));
1615 }
1616
1617 #if !defined(PETSC_USE_COMPLEX)
1618
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
16 PetscCall(copy_array2D_C2S(n,n,d_Ba2,n,d_expmA,n));
1619 #else
1620
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
16 PetscCallCUDA(cudaMemcpy(d_Ba2,d_expmA,sizeof(PetscScalar)*n2,cudaMemcpyDeviceToDevice));
1621 #endif
1622
1623 /* perform repeated squaring */
1624
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
160 for (i=0;i<s;i++) { /* final squaring */
1625
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
128 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&sone,d_Ba2,n,d_Ba2,n,&szero,d_sMaux,n));
1626 128 SlepcSwap(d_Ba2,d_sMaux,saux);
1627
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
128 PetscCall(PetscLogGpuFlops(2.0*n*n*n));
1628 }
1629
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 if (d_Ba2!=d_Ba) {
1630 PetscCallCUDA(cudaMemcpy(d_Ba,d_Ba2,sizeof(PetscScalar)*n2,cudaMemcpyDeviceToDevice));
1631 d_sMaux = d_Ba2;
1632 }
1633
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
32 if (shift) {
1634 32 expshift = PetscExpReal(shift);
1635
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
32 PetscCallCUBLAS(cublasXscal(cublasv2handle,n2,&expshift,d_Ba,one));
1636
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscLogGpuFlops(1.0*n2));
1637 }
1638
1639
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(PetscLogGpuTimeEnd());
1640
1641 /* restore pointers */
1642 32 d_Maux = d_Maux2; d_expmA = d_expmA2; d_RR = d_RR2;
1643
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
32 PetscCall(MatDenseCUDARestoreArrayWrite(B,&d_Ba));
1644
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaFree(d_isreal));
1645
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaFree(d_sMaux));
1646
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaFree(d_Maux));
1647
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaFree(d_expmA));
1648
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaFree(d_As));
1649
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
32 PetscCallCUDA(cudaFree(d_RR));
1650
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
32 PetscCall(PetscFree(piv));
1651 PetscFunctionReturn(PETSC_SUCCESS);
1652 }
1653 #endif /* PETSC_HAVE_MAGMA */
1654 #endif /* PETSC_HAVE_CUDA */
1655
1656 319 static PetscErrorCode FNView_Exp(FN fn,PetscViewer viewer)
1657 {
1658 319 PetscBool isascii;
1659 319 char str[50];
1660 319 const char *methodname[] = {
1661 "scaling & squaring, [m/m] Pade approximant (Higham)",
1662 "scaling & squaring, [6/6] Pade approximant",
1663 "scaling & squaring, subdiagonal Pade approximant (product form)",
1664 "scaling & squaring, subdiagonal Pade approximant (partial fraction)"
1665 };
1666 319 const int nmeth=PETSC_STATIC_ARRAY_LENGTH(methodname);
1667
1668
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
319 PetscFunctionBegin;
1669
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
319 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii));
1670
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
319 if (isascii) {
1671
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
319 if (fn->beta==(PetscScalar)1.0) {
1672
6/8
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
269 if (fn->alpha==(PetscScalar)1.0) PetscCall(PetscViewerASCIIPrintf(viewer," exponential: exp(x)\n"));
1673 else {
1674
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
105 PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE));
1675
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
105 PetscCall(PetscViewerASCIIPrintf(viewer," exponential: exp(%s*x)\n",str));
1676 }
1677 } else {
1678
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
50 PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE));
1679
1/8
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
50 if (fn->alpha==(PetscScalar)1.0) PetscCall(PetscViewerASCIIPrintf(viewer," exponential: %s*exp(x)\n",str));
1680 else {
1681
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
50 PetscCall(PetscViewerASCIIPrintf(viewer," exponential: %s",str));
1682
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
50 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE));
1683
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
50 PetscCall(SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE));
1684
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
50 PetscCall(PetscViewerASCIIPrintf(viewer,"*exp(%s*x)\n",str));
1685
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
50 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE));
1686 }
1687 }
1688
5/8
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
319 if (fn->method<nmeth) PetscCall(PetscViewerASCIIPrintf(viewer," computing matrix functions with: %s\n",methodname[fn->method]));
1689 }
1690
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
59 PetscFunctionReturn(PETSC_SUCCESS);
1691 }
1692
1693 1106 SLEPC_EXTERN PetscErrorCode FNCreate_Exp(FN fn)
1694 {
1695
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
1106 PetscFunctionBegin;
1696 1106 fn->ops->evaluatefunction = FNEvaluateFunction_Exp;
1697 1106 fn->ops->evaluatederivative = FNEvaluateDerivative_Exp;
1698 1106 fn->ops->evaluatefunctionmat[0] = FNEvaluateFunctionMat_Exp_Higham;
1699 1106 fn->ops->evaluatefunctionmat[1] = FNEvaluateFunctionMat_Exp_Pade;
1700 1106 fn->ops->evaluatefunctionmat[2] = FNEvaluateFunctionMat_Exp_GuettelNakatsukasa; /* product form */
1701 1106 fn->ops->evaluatefunctionmat[3] = FNEvaluateFunctionMat_Exp_GuettelNakatsukasa; /* partial fraction */
1702 #if defined(PETSC_HAVE_CUDA)
1703 245 fn->ops->evaluatefunctionmatcuda[1] = FNEvaluateFunctionMat_Exp_Pade_CUDA;
1704 #if defined(PETSC_HAVE_MAGMA)
1705 245 fn->ops->evaluatefunctionmatcuda[0] = FNEvaluateFunctionMat_Exp_Higham_CUDAm;
1706 245 fn->ops->evaluatefunctionmatcuda[1] = FNEvaluateFunctionMat_Exp_Pade_CUDAm;
1707 245 fn->ops->evaluatefunctionmatcuda[2] = FNEvaluateFunctionMat_Exp_GuettelNakatsukasa_CUDAm; /* product form */
1708 245 fn->ops->evaluatefunctionmatcuda[3] = FNEvaluateFunctionMat_Exp_GuettelNakatsukasa_CUDAm; /* partial fraction */
1709 #endif
1710 #endif
1711 1106 fn->ops->view = FNView_Exp;
1712
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
1106 PetscFunctionReturn(PETSC_SUCCESS);
1713 }
1714