GCC Code Coverage Report


Directory: ./
File: src/sys/classes/fn/impls/fnutil.c
Date: 2025-10-04 04:19:13
Exec Total Coverage
Lines: 396 399 99.2%
Functions: 8 8 100.0%
Branches: 737 1546 47.7%

Line Branch Exec Source
1 /*
2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 SLEPc - Scalable Library for Eigenvalue Problem Computations
4 Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5
6 This file is part of SLEPc.
7 SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 */
10 /*
11 Utility subroutines common to several impls
12 */
13
14 #include <slepc/private/fnimpl.h> /*I "slepcfn.h" I*/
15 #include <slepcblaslapack.h>
16
17 /*
18 Compute the square root of an upper quasi-triangular matrix T,
19 using Higham's algorithm (LAA 88, 1987). T is overwritten with sqrtm(T).
20 */
21 1166 static PetscErrorCode SlepcMatDenseSqrt(PetscBLASInt n,PetscScalar *T,PetscBLASInt ld)
22 {
23 1166 PetscScalar one=1.0,mone=-1.0;
24 1166 PetscReal scal;
25 1166 PetscBLASInt i,j,si,sj,r,ione=1,info;
26 #if !defined(PETSC_USE_COMPLEX)
27 514 PetscReal alpha,theta,mu,mu2;
28 #endif
29
30
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
1166 PetscFunctionBegin;
31
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
42432 for (j=0;j<n;j++) {
32 #if defined(PETSC_USE_COMPLEX)
33 24539 sj = 1;
34 24539 T[j+j*ld] = PetscSqrtScalar(T[j+j*ld]);
35 #else
36
4/4
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
16727 sj = (j==n-1 || T[j+1+j*ld] == 0.0)? 1: 2;
37
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
16727 if (sj==1) {
38
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
14207 PetscCheck(T[j+j*ld]>=0.0,PETSC_COMM_SELF,PETSC_ERR_USER_INPUT,"Matrix has a real negative eigenvalue, no real primary square root exists");
39 14207 T[j+j*ld] = PetscSqrtReal(T[j+j*ld]);
40 } else {
41 /* square root of 2x2 block */
42 2520 theta = (T[j+j*ld]+T[j+1+(j+1)*ld])/2.0;
43 2520 mu = (T[j+j*ld]-T[j+1+(j+1)*ld])/2.0;
44 2520 mu2 = -mu*mu-T[j+1+j*ld]*T[j+(j+1)*ld];
45 2520 mu = PetscSqrtReal(mu2);
46
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
2520 if (theta>0.0) alpha = PetscSqrtReal((theta+PetscSqrtReal(theta*theta+mu2))/2.0);
47 else alpha = mu/PetscSqrtReal(2.0*(-theta+PetscSqrtReal(theta*theta+mu2)));
48 2520 T[j+j*ld] /= 2.0*alpha;
49 2520 T[j+1+(j+1)*ld] /= 2.0*alpha;
50 2520 T[j+(j+1)*ld] /= 2.0*alpha;
51 2520 T[j+1+j*ld] /= 2.0*alpha;
52 2520 T[j+j*ld] += alpha-theta/(2.0*alpha);
53 2520 T[j+1+(j+1)*ld] += alpha-theta/(2.0*alpha);
54 }
55 #endif
56
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1068808 for (i=j-1;i>=0;i--) {
57 #if defined(PETSC_USE_COMPLEX)
58 638365 si = 1;
59 #else
60
4/4
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
389177 si = (i==0 || T[i+(i-1)*ld] == 0.0)? 1: 2;
61
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
389177 if (si==2) i--;
62 #endif
63 /* solve Sylvester equation of order si x sj */
64 1027542 r = j-i-si;
65
12/22
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 8 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✓ Branch 9 taken 2 times.
✓ Branch 10 taken 2 times.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✓ Branch 13 taken 2 times.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✓ Branch 16 taken 2 times.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✓ Branch 19 taken 2 times.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
1027542 if (r) PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&si,&sj,&r,&mone,T+i+(i+si)*ld,&ld,T+i+si+j*ld,&ld,&one,T+i+j*ld,&ld));
66
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
1027542 PetscCallBLAS("LAPACKtrsyl",LAPACKtrsyl_("N","N",&ione,&si,&sj,T+i+i*ld,&ld,T+j+j*ld,&ld,T+i+j*ld,&ld,&scal,&info));
67
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
1027542 SlepcCheckLapackInfo("trsyl",info);
68
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
1027542 PetscCheck(scal==1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Current implementation cannot handle scale factor %g",(double)scal);
69 }
70
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 10 times.
41266 if (sj==2) j++;
71 }
72
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
226 PetscFunctionReturn(PETSC_SUCCESS);
73 }
74
75 #define BLOCKSIZE 64
76
77 /*
78 Schur method for the square root of an upper quasi-triangular matrix T.
79 T is overwritten with sqrtm(T).
80 If firstonly then only the first column of T will contain relevant values.
81 */
82 700 PetscErrorCode FNSqrtmSchur(FN fn,PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscBool firstonly)
83 {
84 700 PetscBLASInt i,j,k,r,ione=1,sdim,lwork,*s,*p,info,bs=BLOCKSIZE;
85 700 PetscScalar *wr,*W,*Q,*work,one=1.0,zero=0.0,mone=-1.0;
86 700 PetscInt m,nblk;
87 700 PetscReal scal;
88 #if defined(PETSC_USE_COMPLEX)
89 385 PetscReal *rwork;
90 #else
91 315 PetscReal *wi;
92 #endif
93
94
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
700 PetscFunctionBegin;
95 700 m = n;
96 700 nblk = (m+bs-1)/bs;
97 700 lwork = 5*n;
98
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
700 k = firstonly? 1: n;
99
100 /* compute Schur decomposition A*Q = Q*T */
101 #if !defined(PETSC_USE_COMPLEX)
102
4/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
315 PetscCall(PetscMalloc7(m,&wr,m,&wi,m*k,&W,m*m,&Q,lwork,&work,nblk,&s,nblk,&p));
103
10/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
315 PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,wi,Q,&ld,work,&lwork,NULL,&info));
104 #else
105
4/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
385 PetscCall(PetscMalloc7(m,&wr,m,&rwork,m*k,&W,m*m,&Q,lwork,&work,nblk,&s,nblk,&p));
106
10/20
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 1 times.
✓ Branch 8 taken 1 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 1 times.
✓ Branch 12 taken 1 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 1 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 1 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
385 PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,Q,&ld,work,&lwork,rwork,NULL,&info));
107 #endif
108
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
700 SlepcCheckLapackInfo("gees",info);
109
110 /* determine block sizes and positions, to avoid cutting 2x2 blocks */
111 700 j = 0;
112 700 p[j] = 0;
113 1525 do {
114 1166 s[j] = PetscMin(bs,n-p[j]);
115 #if !defined(PETSC_USE_COMPLEX)
116
4/4
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 5 times.
514 if (p[j]+s[j]!=n && T[p[j]+s[j]+(p[j]+s[j]-1)*ld]!=0.0) s[j]++;
117 #endif
118
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1166 if (p[j]+s[j]==n) break;
119 466 j++;
120 466 p[j] = p[j-1]+s[j-1];
121 107 } while (1);
122 1866 nblk = j+1;
123
124
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1866 for (j=0;j<nblk;j++) {
125 /* evaluate f(T_jj) */
126
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1166 PetscCall(SlepcMatDenseSqrt(s[j],T+p[j]+p[j]*ld,ld));
127
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1632 for (i=j-1;i>=0;i--) {
128 /* solve Sylvester equation for block (i,j) */
129 466 r = p[j]-p[i]-s[i];
130
1/22
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✗ Branch 11 not taken.
✗ Branch 12 not taken.
✗ Branch 13 not taken.
✗ Branch 14 not taken.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✗ Branch 17 not taken.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
✗ Branch 20 not taken.
✗ Branch 21 not taken.
466 if (r) PetscCallBLAS("BLASgemm",BLASgemm_("N","N",s+i,s+j,&r,&mone,T+p[i]+(p[i]+s[i])*ld,&ld,T+p[i]+s[i]+p[j]*ld,&ld,&one,T+p[i]+p[j]*ld,&ld));
131
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
466 PetscCallBLAS("LAPACKtrsyl",LAPACKtrsyl_("N","N",&ione,s+i,s+j,T+p[i]+p[i]*ld,&ld,T+p[j]+p[j]*ld,&ld,T+p[i]+p[j]*ld,&ld,&scal,&info));
132
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
466 SlepcCheckLapackInfo("trsyl",info);
133
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
466 PetscCheck(scal==1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Current implementation cannot handle scale factor %g",(double)scal);
134 }
135 }
136
137 /* backtransform B = Q*T*Q' */
138
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
700 PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&k,&n,&one,T,&ld,Q,&ld,&zero,W,&ld));
139
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
700 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&k,&n,&one,Q,&ld,W,&ld,&zero,T,&ld));
140
141 /* flop count: Schur decomposition, triangular square root, and backtransform */
142
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
700 PetscCall(PetscLogFlops(25.0*n*n*n+n*n*n/3.0+4.0*n*n*k));
143
144 #if !defined(PETSC_USE_COMPLEX)
145
4/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
315 PetscCall(PetscFree7(wr,wi,W,Q,work,s,p));
146 #else
147
4/6
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 1 times.
385 PetscCall(PetscFree7(wr,rwork,W,Q,work,s,p));
148 #endif
149
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
134 PetscFunctionReturn(PETSC_SUCCESS);
150 }
151
152 #define DBMAXIT 25
153
154 /*
155 Computes the principal square root of the matrix T using the product form
156 of the Denman-Beavers iteration.
157 T is overwritten with sqrtm(T) or inv(sqrtm(T)) depending on flag inv.
158 */
159 246 PetscErrorCode FNSqrtmDenmanBeavers(FN fn,PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscBool inv)
160 {
161 246 PetscScalar *Told,*M=NULL,*invM,*work,work1,prod,alpha;
162 246 PetscScalar szero=0.0,sone=1.0,smone=-1.0,spfive=0.5,sp25=0.25;
163 246 PetscReal tol,Mres=0.0,detM,g,reldiff,fnormdiff,fnormT,rwork[1];
164 246 PetscBLASInt N,i,it,*piv=NULL,info,query=-1,lwork;
165 246 const PetscBLASInt one=1;
166 246 PetscBool converged=PETSC_FALSE,scale;
167 246 unsigned int ftz;
168
169
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
246 PetscFunctionBegin;
170 246 N = n*n;
171 246 tol = PetscSqrtReal((PetscReal)n)*PETSC_MACHINE_EPSILON/2;
172 246 scale = PetscDefined(USE_REAL_SINGLE)? PETSC_FALSE: PETSC_TRUE;
173
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(SlepcSetFlushToZero(&ftz));
174
175 /* query work size */
176
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
246 PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n,M,&ld,piv,&work1,&query,&info));
177
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(work1),&lwork));
178
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(PetscMalloc5(lwork,&work,n,&piv,n*n,&Told,n*n,&M,n*n,&invM));
179
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(PetscArraycpy(M,T,n*n));
180
181
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
246 if (inv) { /* start recurrence with I instead of A */
182
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
120 PetscCall(PetscArrayzero(T,n*n));
183
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1320 for (i=0;i<n;i++) T[i+i*ld] += 1.0;
184 }
185
186
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1476 for (it=0;it<DBMAXIT && !converged;it++) {
187
188
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1230 if (scale) { /* g = (abs(det(M)))^(-1/(2*n)) */
189
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
738 PetscCall(PetscArraycpy(invM,M,n*n));
190
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
738 PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,invM,&ld,piv,&info));
191
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
738 SlepcCheckLapackInfo("getrf",info);
192 738 prod = invM[0];
193
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
39942 for (i=1;i<n;i++) prod *= invM[i+i*ld];
194 738 detM = PetscAbsScalar(prod);
195
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
738 g = (detM>PETSC_MAX_REAL)? 0.5: PetscPowReal(detM,-1.0/(2.0*n));
196 738 alpha = g;
197
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
738 PetscCallBLAS("BLASscal",BLASscal_(&N,&alpha,T,&one));
198 738 alpha = g*g;
199
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
738 PetscCallBLAS("BLASscal",BLASscal_(&N,&alpha,M,&one));
200
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
738 PetscCall(PetscLogFlops(2.0*n*n*n/3.0+2.0*n*n));
201 }
202
203
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1230 PetscCall(PetscArraycpy(Told,T,n*n));
204
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1230 PetscCall(PetscArraycpy(invM,M,n*n));
205
206
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
1230 PetscCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,invM,&ld,piv,&info));
207
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
1230 SlepcCheckLapackInfo("getrf",info);
208
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
1230 PetscCallBLAS("LAPACKgetri",LAPACKgetri_(&n,invM,&ld,piv,work,&lwork,&info));
209
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
1230 SlepcCheckLapackInfo("getri",info);
210
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1230 PetscCall(PetscLogFlops(2.0*n*n*n/3.0+4.0*n*n*n/3.0));
211
212
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
71400 for (i=0;i<n;i++) invM[i+i*ld] += 1.0;
213
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
1230 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&spfive,Told,&ld,invM,&ld,&szero,T,&ld));
214
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
71400 for (i=0;i<n;i++) invM[i+i*ld] -= 1.0;
215
216
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
1230 PetscCallBLAS("BLASaxpy",BLASaxpy_(&N,&sone,invM,&one,M,&one));
217
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
1230 PetscCallBLAS("BLASscal",BLASscal_(&N,&sp25,M,&one));
218
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
71400 for (i=0;i<n;i++) M[i+i*ld] -= 0.5;
219
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
1230 PetscCall(PetscLogFlops(2.0*n*n*n+2.0*n*n));
220
221 1230 Mres = LAPACKlange_("F",&n,&n,M,&n,rwork);
222
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
72630 for (i=0;i<n;i++) M[i+i*ld] += 1.0;
223
224
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1230 if (scale) {
225 /* reldiff = norm(T - Told,'fro')/norm(T,'fro') */
226
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
738 PetscCallBLAS("BLASaxpy",BLASaxpy_(&N,&smone,T,&one,Told,&one));
227 738 fnormdiff = LAPACKlange_("F",&n,&n,Told,&n,rwork);
228 738 fnormT = LAPACKlange_("F",&n,&n,T,&n,rwork);
229
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
738 PetscCall(PetscLogFlops(7.0*n*n));
230 738 reldiff = fnormdiff/fnormT;
231
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
738 PetscCall(PetscInfo(fn,"it: %" PetscBLASInt_FMT " reldiff: %g scale: %g tol*scale: %g\n",it,(double)reldiff,(double)g,(double)(tol*g)));
232
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
738 if (reldiff<1e-2) scale = PETSC_FALSE; /* Switch off scaling */
233 }
234
235
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1230 if (Mres<=tol) converged = PETSC_TRUE;
236 }
237
238
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
246 PetscCheck(Mres<=tol,PETSC_COMM_SELF,PETSC_ERR_LIB,"SQRTM not converged after %d iterations",DBMAXIT);
239
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(PetscFree5(work,piv,Told,M,invM));
240
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
73 PetscCall(SlepcResetFlushToZero(&ftz));
241
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
48 PetscFunctionReturn(PETSC_SUCCESS);
242 }
243
244 #define NSMAXIT 50
245
246 /*
247 Computes the principal square root of the matrix A using the Newton-Schulz iteration.
248 T is overwritten with sqrtm(T) or inv(sqrtm(T)) depending on flag inv.
249 */
250 246 PetscErrorCode FNSqrtmNewtonSchulz(FN fn,PetscBLASInt n,PetscScalar *A,PetscBLASInt ld,PetscBool inv)
251 {
252 246 PetscScalar *Y=A,*Yold,*Z,*Zold,*M;
253 246 PetscScalar szero=0.0,sone=1.0,smone=-1.0,spfive=0.5,sthree=3.0;
254 246 PetscReal sqrtnrm,tol,Yres=0.0,nrm,rwork[1],done=1.0;
255 246 PetscBLASInt info,i,it,N,one=1,zero=0;
256 246 PetscBool converged=PETSC_FALSE;
257 246 unsigned int ftz;
258
259
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
246 PetscFunctionBegin;
260 246 N = n*n;
261 246 tol = PetscSqrtReal((PetscReal)n)*PETSC_MACHINE_EPSILON/2;
262
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(SlepcSetFlushToZero(&ftz));
263
264
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(PetscMalloc4(N,&Yold,N,&Z,N,&Zold,N,&M));
265
266 /* scale */
267
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(PetscArraycpy(Z,A,N));
268
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
13560 for (i=0;i<n;i++) Z[i+i*ld] -= 1.0;
269 246 nrm = LAPACKlange_("fro",&n,&n,Z,&n,rwork);
270 246 sqrtnrm = PetscSqrtReal(nrm);
271
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
246 PetscCallBLAS("LAPACKlascl",LAPACKlascl_("G",&zero,&zero,&nrm,&done,&N,&one,A,&N,&info));
272
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
246 SlepcCheckLapackInfo("lascl",info);
273 246 tol *= nrm;
274
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(PetscInfo(fn,"||I-A||_F = %g, new tol: %g\n",(double)nrm,(double)tol));
275
3/6
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(PetscLogFlops(2.0*n*n));
276
277 /* Z = I */
278
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(PetscArrayzero(Z,N));
279
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
13560 for (i=0;i<n;i++) Z[i+i*ld] = 1.0;
280
281
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
2878 for (it=0;it<NSMAXIT && !converged;it++) {
282 /* Yold = Y, Zold = Z */
283
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
2632 PetscCall(PetscArraycpy(Yold,Y,N));
284
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
2632 PetscCall(PetscArraycpy(Zold,Z,N));
285
286 /* M = (3*I-Zold*Yold) */
287
4/6
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
2632 PetscCall(PetscArrayzero(M,N));
288
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
170000 for (i=0;i<n;i++) M[i+i*ld] = sthree;
289
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2632 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&smone,Zold,&ld,Yold,&ld,&sone,M,&ld));
290
291 /* Y = (1/2)*Yold*M, Z = (1/2)*M*Zold */
292
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2632 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&spfive,Yold,&ld,M,&ld,&szero,Y,&ld));
293
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2632 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&spfive,M,&ld,Zold,&ld,&szero,Z,&ld));
294
295 /* reldiff = norm(Y-Yold,'fro')/norm(Y,'fro') */
296
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2632 PetscCallBLAS("BLASaxpy",BLASaxpy_(&N,&smone,Y,&one,Yold,&one));
297 2632 Yres = LAPACKlange_("fro",&n,&n,Yold,&n,rwork);
298
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
2632 PetscCheck(!PetscIsNanReal(Yres),PETSC_COMM_SELF,PETSC_ERR_FP,"The computed norm is not-a-number");
299
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
2632 if (Yres<=tol) converged = PETSC_TRUE;
300
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
2632 PetscCall(PetscInfo(fn,"it: %" PetscBLASInt_FMT " res: %g\n",it,(double)Yres));
301
302
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
2632 PetscCall(PetscLogFlops(6.0*n*n*n+2.0*n*n));
303 }
304
305
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
246 PetscCheck(Yres<=tol,PETSC_COMM_SELF,PETSC_ERR_LIB,"SQRTM not converged after %d iterations",NSMAXIT);
306
307 /* undo scaling */
308
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
246 if (inv) {
309
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
120 PetscCall(PetscArraycpy(A,Z,N));
310
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
120 PetscCallBLAS("LAPACKlascl",LAPACKlascl_("G",&zero,&zero,&sqrtnrm,&done,&N,&one,A,&N,&info));
311
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
126 } else PetscCallBLAS("LAPACKlascl",LAPACKlascl_("G",&zero,&zero,&done,&sqrtnrm,&N,&one,A,&N,&info));
312
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
246 SlepcCheckLapackInfo("lascl",info);
313
314
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
246 PetscCall(PetscFree4(Yold,Z,Zold,M));
315
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
73 PetscCall(SlepcResetFlushToZero(&ftz));
316
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
48 PetscFunctionReturn(PETSC_SUCCESS);
317 }
318
319 #if defined(PETSC_HAVE_CUDA)
320 #include "../src/sys/classes/fn/impls/cuda/fnutilcuda.h"
321 #include <slepccupmblas.h>
322
323 /*
324 * Matrix square root by Newton-Schulz iteration. CUDA version.
325 * Computes the principal square root of the matrix A using the
326 * Newton-Schulz iteration. A is overwritten with sqrtm(A).
327 */
328 48 PetscErrorCode FNSqrtmNewtonSchulz_CUDA(FN fn,PetscBLASInt n,PetscScalar *d_A,PetscBLASInt ld,PetscBool inv)
329 {
330 48 PetscScalar *d_Yold,*d_Z,*d_Zold,*d_M,alpha;
331 48 PetscReal nrm,sqrtnrm,tol,Yres=0.0;
332 48 const PetscScalar szero=0.0,sone=1.0,smone=-1.0,spfive=0.5,sthree=3.0;
333 48 PetscInt it;
334 48 PetscBLASInt N;
335 48 const PetscBLASInt one=1;
336 48 PetscBool converged=PETSC_FALSE;
337 48 cublasHandle_t cublasv2handle;
338
339 48 PetscFunctionBegin;
340
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); /* For CUDA event timers */
341
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscCUBLASGetHandle(&cublasv2handle));
342 48 N = n*n;
343 48 tol = PetscSqrtReal((PetscReal)n)*PETSC_MACHINE_EPSILON/2;
344
345
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMalloc((void **)&d_Yold,sizeof(PetscScalar)*N));
346
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMalloc((void **)&d_Z,sizeof(PetscScalar)*N));
347
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMalloc((void **)&d_Zold,sizeof(PetscScalar)*N));
348
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMalloc((void **)&d_M,sizeof(PetscScalar)*N));
349
350
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscLogGpuTimeBegin());
351
352 /* Z = I; */
353
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMemset(d_Z,0,sizeof(PetscScalar)*N));
354
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(set_diagonal(n,d_Z,ld,sone));
355
356 /* scale */
357
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
48 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,N,&smone,d_A,one,d_Z,one));
358
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
48 PetscCallCUBLAS(cublasXnrm2(cublasv2handle,N,d_Z,one,&nrm));
359 48 sqrtnrm = PetscSqrtReal(nrm);
360 48 alpha = 1.0/nrm;
361
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
48 PetscCallCUBLAS(cublasXscal(cublasv2handle,N,&alpha,d_A,one));
362 48 tol *= nrm;
363
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscInfo(fn,"||I-A||_F = %g, new tol: %g\n",(double)nrm,(double)tol));
364
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscLogGpuFlops(2.0*n*n));
365
366 /* Z = I; */
367
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMemset(d_Z,0,sizeof(PetscScalar)*N));
368
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(set_diagonal(n,d_Z,ld,sone));
369
370
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
560 for (it=0;it<NSMAXIT && !converged;it++) {
371 /* Yold = Y, Zold = Z */
372
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
512 PetscCallCUDA(cudaMemcpy(d_Yold,d_A,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice));
373
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
512 PetscCallCUDA(cudaMemcpy(d_Zold,d_Z,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice));
374
375 /* M = (3*I - Zold*Yold) */
376
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
512 PetscCallCUDA(cudaMemset(d_M,0,sizeof(PetscScalar)*N));
377
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
512 PetscCall(set_diagonal(n,d_M,ld,sthree));
378
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
512 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&smone,d_Zold,ld,d_Yold,ld,&sone,d_M,ld));
379
380 /* Y = (1/2) * Yold * M, Z = (1/2) * M * Zold */
381
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
512 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&spfive,d_Yold,ld,d_M,ld,&szero,d_A,ld));
382
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
512 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&spfive,d_M,ld,d_Zold,ld,&szero,d_Z,ld));
383
384 /* reldiff = norm(Y-Yold,'fro')/norm(Y,'fro') */
385
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
512 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,N,&smone,d_A,one,d_Yold,one));
386
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
512 PetscCallCUBLAS(cublasXnrm2(cublasv2handle,N,d_Yold,one,&Yres));
387
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
512 PetscCheck(!PetscIsNanReal(Yres),PETSC_COMM_SELF,PETSC_ERR_FP,"The computed norm is not-a-number");
388
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
512 if (Yres<=tol) converged = PETSC_TRUE;
389
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
512 PetscCall(PetscInfo(fn,"it: %" PetscInt_FMT " res: %g\n",it,(double)Yres));
390
391
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
512 PetscCall(PetscLogGpuFlops(6.0*n*n*n+2.0*n*n));
392 }
393
394
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCheck(Yres<=tol,PETSC_COMM_SELF,PETSC_ERR_LIB,"SQRTM not converged after %d iterations", NSMAXIT);
395
396 /* undo scaling */
397
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
48 if (inv) {
398 24 alpha = 1.0/sqrtnrm;
399
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
24 PetscCallCUBLAS(cublasXscal(cublasv2handle,N,&alpha,d_Z,one));
400
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
24 PetscCallCUDA(cudaMemcpy(d_A,d_Z,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice));
401 } else {
402 24 alpha = sqrtnrm;
403
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
24 PetscCallCUBLAS(cublasXscal(cublasv2handle,N,&alpha,d_A,one));
404 }
405
406
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscLogGpuTimeEnd());
407
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaFree(d_Yold));
408
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaFree(d_Z));
409
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaFree(d_Zold));
410
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaFree(d_M));
411 PetscFunctionReturn(PETSC_SUCCESS);
412 }
413
414 #if defined(PETSC_HAVE_MAGMA)
415 #include <slepcmagma.h>
416
417 /*
418 * Matrix square root by product form of Denman-Beavers iteration. CUDA version.
419 * Computes the principal square root of the matrix T using the product form
420 * of the Denman-Beavers iteration. T is overwritten with sqrtm(T).
421 */
422 48 PetscErrorCode FNSqrtmDenmanBeavers_CUDAm(FN fn,PetscBLASInt n,PetscScalar *d_T,PetscBLASInt ld,PetscBool inv)
423 {
424 48 PetscScalar *d_Told,*d_M,*d_invM,*d_work,prod,szero=0.0,sone=1.0,smone=-1.0,spfive=0.5,sneg_pfive=-0.5,sp25=0.25,alpha;
425 48 PetscReal tol,Mres=0.0,detM,g,reldiff,fnormdiff,fnormT;
426 48 PetscInt it,lwork,nb;
427 48 PetscBLASInt N,one=1,*piv=NULL;
428 48 PetscBool converged=PETSC_FALSE,scale;
429 48 cublasHandle_t cublasv2handle;
430
431 48 PetscFunctionBegin;
432
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); /* For CUDA event timers */
433
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscCUBLASGetHandle(&cublasv2handle));
434
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(SlepcMagmaInit());
435 48 N = n*n;
436 48 scale = PetscDefined(USE_REAL_SINGLE)? PETSC_FALSE: PETSC_TRUE;
437 48 tol = PetscSqrtReal((PetscReal)n)*PETSC_MACHINE_EPSILON/2;
438
439 /* query work size */
440 48 nb = magma_get_xgetri_nb(n);
441 48 lwork = nb*n;
442
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscMalloc1(n,&piv));
443
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMalloc((void **)&d_work,sizeof(PetscScalar)*lwork));
444
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMalloc((void **)&d_Told,sizeof(PetscScalar)*N));
445
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMalloc((void **)&d_M,sizeof(PetscScalar)*N));
446
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMalloc((void **)&d_invM,sizeof(PetscScalar)*N));
447
448
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscLogGpuTimeBegin());
449
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaMemcpy(d_M,d_T,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice));
450
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
48 if (inv) { /* start recurrence with I instead of A */
451
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
24 PetscCallCUDA(cudaMemset(d_T,0,sizeof(PetscScalar)*N));
452
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
24 PetscCall(set_diagonal(n,d_T,ld,1.0));
453 }
454
455
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
304 for (it=0;it<DBMAXIT && !converged;it++) {
456
457
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
256 if (scale) { /* g = (abs(det(M)))^(-1/(2*n)); */
458
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
160 PetscCallCUDA(cudaMemcpy(d_invM,d_M,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice));
459
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
160 PetscCallMAGMA(magma_xgetrf_gpu,n,n,d_invM,ld,piv);
460
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
160 PetscCall(mult_diagonal(n,d_invM,ld,&prod));
461 160 detM = PetscAbsScalar(prod);
462
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
160 g = (detM>PETSC_MAX_REAL)? 0.5: PetscPowReal(detM,-1.0/(2.0*n));
463 160 alpha = g;
464
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
160 PetscCallCUBLAS(cublasXscal(cublasv2handle,N,&alpha,d_T,one));
465 160 alpha = g*g;
466
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
160 PetscCallCUBLAS(cublasXscal(cublasv2handle,N,&alpha,d_M,one));
467
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
160 PetscCall(PetscLogGpuFlops(2.0*n*n*n/3.0+2.0*n*n));
468 }
469
470
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
256 PetscCallCUDA(cudaMemcpy(d_Told,d_T,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice));
471
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
256 PetscCallCUDA(cudaMemcpy(d_invM,d_M,sizeof(PetscScalar)*N,cudaMemcpyDeviceToDevice));
472
473
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
256 PetscCallMAGMA(magma_xgetrf_gpu,n,n,d_invM,ld,piv);
474
2/6
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
256 PetscCallMAGMA(magma_xgetri_gpu,n,d_invM,ld,piv,d_work,lwork);
475
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
256 PetscCall(PetscLogGpuFlops(2.0*n*n*n/3.0+4.0*n*n*n/3.0));
476
477
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
256 PetscCall(shift_diagonal(n,d_invM,ld,sone));
478
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
256 PetscCallCUBLAS(cublasXgemm(cublasv2handle,CUBLAS_OP_N,CUBLAS_OP_N,n,n,n,&spfive,d_Told,ld,d_invM,ld,&szero,d_T,ld));
479
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
256 PetscCall(shift_diagonal(n,d_invM,ld,smone));
480
481
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
256 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,N,&sone,d_invM,one,d_M,one));
482
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
256 PetscCallCUBLAS(cublasXscal(cublasv2handle,N,&sp25,d_M,one));
483
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
256 PetscCall(shift_diagonal(n,d_M,ld,sneg_pfive));
484
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
256 PetscCall(PetscLogGpuFlops(2.0*n*n*n+2.0*n*n));
485
486
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
256 PetscCallCUBLAS(cublasXnrm2(cublasv2handle,N,d_M,one,&Mres));
487
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
256 PetscCall(shift_diagonal(n,d_M,ld,sone));
488
489
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
256 if (scale) {
490 /* reldiff = norm(T - Told,'fro')/norm(T,'fro'); */
491
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
160 PetscCallCUBLAS(cublasXaxpy(cublasv2handle,N,&smone,d_T,one,d_Told,one));
492
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
160 PetscCallCUBLAS(cublasXnrm2(cublasv2handle,N,d_Told,one,&fnormdiff));
493
1/10
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✗ Branch 8 not taken.
✗ Branch 9 not taken.
160 PetscCallCUBLAS(cublasXnrm2(cublasv2handle,N,d_T,one,&fnormT));
494
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
160 PetscCall(PetscLogGpuFlops(7.0*n*n));
495 160 reldiff = fnormdiff/fnormT;
496
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
160 PetscCall(PetscInfo(fn,"it: %" PetscInt_FMT " reldiff: %g scale: %g tol*scale: %g\n",it,(double)reldiff,(double)g,(double)tol*g));
497
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
160 if (reldiff<1e-2) scale = PETSC_FALSE; /* Switch to no scaling. */
498 }
499
500
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
256 PetscCall(PetscInfo(fn,"it: %" PetscInt_FMT " Mres: %g\n",it,(double)Mres));
501
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
256 if (Mres<=tol) converged = PETSC_TRUE;
502 }
503
504
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCheck(Mres<=tol,PETSC_COMM_SELF,PETSC_ERR_LIB,"SQRTM not converged after %d iterations", DBMAXIT);
505
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
48 PetscCall(PetscLogGpuTimeEnd());
506
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
48 PetscCall(PetscFree(piv));
507
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaFree(d_work));
508
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaFree(d_Told));
509
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaFree(d_M));
510
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
48 PetscCallCUDA(cudaFree(d_invM));
511 PetscFunctionReturn(PETSC_SUCCESS);
512 }
513 #endif /* PETSC_HAVE_MAGMA */
514
515 #endif /* PETSC_HAVE_CUDA */
516
517 #define ITMAX 5
518
519 /*
520 Estimate norm(A^m,1) by block 1-norm power method (required workspace is 11*n)
521 */
522 280 static PetscErrorCode SlepcNormEst1(PetscBLASInt n,PetscScalar *A,PetscInt m,PetscScalar *work,PetscRandom rand,PetscReal *nrm)
523 {
524 280 PetscScalar *X,*Y,*Z,*S,*S_old,*aux,val,sone=1.0,szero=0.0;
525 280 PetscReal est=0.0,est_old,vals[2]={0.0,0.0},*zvals,maxzval[2],raux;
526 280 PetscBLASInt i,j,t=2,it=0,ind[2],est_j=0,m1;
527
528
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
280 PetscFunctionBegin;
529 280 X = work;
530 280 Y = work + 2*n;
531 280 Z = work + 4*n;
532 280 S = work + 6*n;
533 280 S_old = work + 8*n;
534 280 zvals = (PetscReal*)(work + 10*n);
535
536
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
33980 for (i=0;i<n;i++) { /* X has columns of unit 1-norm */
537 33700 X[i] = 1.0/n;
538
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
33700 PetscCall(PetscRandomGetValue(rand,&val));
539
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
33700 if (PetscRealPart(val) < 0.5) X[i+n] = -1.0/n;
540 16595 else X[i+n] = 1.0/n;
541 }
542
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
67680 for (i=0;i<t*n;i++) S[i] = 0.0;
543 280 ind[0] = 0; ind[1] = 0;
544 280 est_old = 0;
545 609 while (1) {
546 609 it++;
547
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
2667 for (j=0;j<m;j++) { /* Y = A^m*X */
548
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2058 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&t,&n,&sone,A,&n,X,&n,&szero,Y,&n));
549
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
2058 if (j<m-1) SlepcSwap(X,Y,aux);
550 }
551
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
1827 for (j=0;j<t;j++) { /* vals[j] = norm(Y(:,j),1) */
552 1218 vals[j] = 0.0;
553
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
147812 for (i=0;i<n;i++) vals[j] += PetscAbsScalar(Y[i+j*n]);
554 }
555
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
609 if (vals[0]<vals[1]) {
556 340 SlepcSwap(vals[0],vals[1],raux);
557 340 m1 = 1;
558 } else m1 = 0;
559 609 est = vals[0];
560
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
609 if (est>est_old || it==2) est_j = ind[m1];
561
3/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✗ Branch 3 not taken.
609 if (it>=2 && est<=est_old) {
562 est = est_old;
563 break;
564 }
565 120516 est_old = est;
566
1/2
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
385 if (it>ITMAX) break;
567 147203 SlepcSwap(S,S_old,aux);
568
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
147203 for (i=0;i<t*n;i++) { /* S = sign(Y) */
569
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
255459 S[i] = (PetscRealPart(Y[i]) < 0.0)? -1.0: 1.0;
570 }
571
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
2667 for (j=0;j<m;j++) { /* Z = (A^T)^m*S */
572
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
2058 PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n,&t,&n,&sone,A,&n,S,&n,&szero,Z,&n));
573
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
2058 if (j<m-1) SlepcSwap(S,Z,aux);
574 }
575 609 maxzval[0] = -1; maxzval[1] = -1;
576 609 ind[0] = 0; ind[1] = 0;
577
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
73906 for (i=0;i<n;i++) { /* zvals[i] = norm(Z(i,:),inf) */
578
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
73297 zvals[i] = PetscMax(PetscAbsScalar(Z[i+0*n]),PetscAbsScalar(Z[i+1*n]));
579
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
73297 if (zvals[i]>maxzval[0]) {
580 7221 maxzval[0] = zvals[i];
581 7221 ind[0] = i;
582
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
66076 } else if (zvals[i]>maxzval[1]) {
583 3348 maxzval[1] = zvals[i];
584 3348 ind[1] = i;
585 }
586 }
587
4/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 10 times.
609 if (it>=2 && maxzval[0]==zvals[est_j]) break;
588
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
79523 for (i=0;i<t*n;i++) X[i] = 0.0;
589
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
987 for (j=0;j<t;j++) X[ind[j]+j*n] = 1.0;
590 }
591 280 *nrm = est;
592 /* Flop count is roughly (it * 2*m * t*gemv) = 4*its*m*t*n*n */
593
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
280 PetscCall(PetscLogFlops(4.0*it*m*t*n*n));
594
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
280 PetscFunctionReturn(PETSC_SUCCESS);
595 }
596
597 #define SMALLN 100
598
599 /*
600 Estimate norm(A^m,1) (required workspace is 2*n*n)
601 */
602 5240 PetscErrorCode SlepcNormAm(PetscBLASInt n,PetscScalar *A,PetscInt m,PetscScalar *work,PetscRandom rand,PetscReal *nrm)
603 {
604 5240 PetscScalar *v=work,*w=work+n*n,*aux,sone=1.0,szero=0.0;
605 5240 PetscReal rwork[1],tmp;
606 5240 PetscBLASInt i,j,one=1;
607 5240 PetscBool isrealpos=PETSC_TRUE;
608
609
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
5240 PetscFunctionBegin;
610
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
5240 if (n<SMALLN) { /* compute matrix power explicitly */
611
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
4780 if (m==1) {
612 *nrm = LAPACKlange_("O",&n,&n,A,&n,rwork);
613 PetscCall(PetscLogFlops(1.0*n*n));
614 } else { /* m>=2 */
615
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
4780 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,A,&n,A,&n,&szero,v,&n));
616
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
75190 for (j=0;j<m-2;j++) {
617
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
70410 PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&sone,A,&n,v,&n,&szero,w,&n));
618 70410 SlepcSwap(v,w,aux);
619 }
620 4780 *nrm = LAPACKlange_("O",&n,&n,v,&n,rwork);
621
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
4780 PetscCall(PetscLogFlops(2.0*n*n*n*(m-1)+1.0*n*n));
622 }
623 } else {
624
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
55010 for (i=0;i<n;i++)
625
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
4421260 for (j=0;j<n;j++)
626 #if defined(PETSC_USE_COMPLEX)
627
3/4
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 5 times.
✗ Branch 3 not taken.
2200145 if (PetscRealPart(A[i+j*n])<0.0 || PetscImaginaryPart(A[i+j*n])!=0.0) { isrealpos = PETSC_FALSE; break; }
628 #else
629
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
2200145 if (A[i+j*n]<0.0) { isrealpos = PETSC_FALSE; break; }
630 #endif
631
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
460 if (isrealpos) { /* for positive matrices only */
632
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
21030 for (i=0;i<n;i++) v[i] = 1.0;
633
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
4380 for (j=0;j<m;j++) { /* w = A'*v */
634
10/20
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✓ Branch 7 taken 2 times.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
✓ Branch 12 taken 2 times.
✗ Branch 13 not taken.
✓ Branch 14 taken 2 times.
✗ Branch 15 not taken.
✗ Branch 16 not taken.
✓ Branch 17 taken 2 times.
✗ Branch 18 not taken.
✗ Branch 19 not taken.
4200 PetscCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&sone,A,&n,v,&one,&szero,w,&one));
635 4200 SlepcSwap(v,w,aux);
636 }
637
3/6
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
180 PetscCall(PetscLogFlops(2.0*n*n*m));
638 180 *nrm = 0.0;
639
4/4
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 10 times.
✓ Branch 3 taken 10 times.
21030 for (i=0;i<n;i++) if ((tmp = PetscAbsScalar(v[i])) > *nrm) *nrm = tmp; /* norm(v,inf) */
640
4/6
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 2 times.
280 } else PetscCall(SlepcNormEst1(n,A,m,work,rand,nrm));
641 }
642
6/12
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 2 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 2 times.
✗ Branch 9 not taken.
✗ Branch 10 not taken.
✓ Branch 11 taken 2 times.
1032 PetscFunctionReturn(PETSC_SUCCESS);
643 }
644