Line data Source code
1 : /*
2 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3 : SLEPc - Scalable Library for Eigenvalue Problem Computations
4 : Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
5 :
6 : This file is part of SLEPc.
7 : SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8 : - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9 : */
10 : /*
11 : BDC - Block-divide and conquer (see description in README file)
12 : */
13 :
14 : #include <slepc/private/dsimpl.h>
15 : #include <slepcblaslapack.h>
16 :
17 21 : PetscErrorCode BDC_dmerg2_(const char *jobz,PetscBLASInt rkct,PetscBLASInt n,
18 : PetscReal *ev,PetscReal *q,PetscBLASInt ldq,PetscBLASInt *indxq,
19 : PetscReal *rho,PetscReal *u,PetscBLASInt sbrkp1,PetscReal *v,
20 : PetscBLASInt sbrk,PetscBLASInt cutpnt,PetscReal *work,PetscBLASInt lwork,
21 : PetscBLASInt *iwork,PetscReal tol,PetscBLASInt *info,PetscBLASInt jobz_len)
22 : {
23 : /* -- Routine written in LAPACK Version 3.0 style -- */
24 : /* *************************************************** */
25 : /* Written by */
26 : /* Michael Moldaschl and Wilfried Gansterer */
27 : /* University of Vienna */
28 : /* last modification: March 16, 2014 */
29 :
30 : /* Small adaptations of original code written by */
31 : /* Wilfried Gansterer and Bob Ward, */
32 : /* Department of Computer Science, University of Tennessee */
33 : /* see https://doi.org/10.1137/S1064827501399432 */
34 : /* *************************************************** */
35 :
36 : /* Purpose */
37 : /* ======= */
38 :
39 : /* DMERG2 computes the updated eigensystem of a diagonal matrix after */
40 : /* modification by a rank-one symmetric matrix. The diagonal matrix */
41 : /* consists of two diagonal submatrices, and the vectors defining the */
42 : /* rank-1 matrix similarly have two underlying subvectors each. */
43 : /* The dimension of the first subproblem is CUTPNT, the dimension of */
44 : /* the second subproblem is N-CUTPNT. */
45 :
46 : /* T = Q(in) (EV(in) + RHO * Z*Z') Q'(in) = Q(out) * EV(out) * Q'(out) */
47 :
48 : /* where Z = Q'[V U']', where V is a row vector and U is a column */
49 : /* vector with dimensions corresponding to the two underlying */
50 : /* subproblems. */
51 :
52 : /* The eigenvectors of the original matrix are stored in Q, and the */
53 : /* eigenvalues in EV. The algorithm consists of three stages: */
54 :
55 : /* The first stage consists of deflating the size of the problem */
56 : /* when there are multiple eigenvalues or if there is a zero in */
57 : /* the Z vector. For each such occurrence the dimension of the */
58 : /* secular equation problem is reduced by one. This stage is */
59 : /* performed by the routine DSRTDF. */
60 :
61 : /* The second stage consists of calculating the updated */
62 : /* eigenvalues. This is done by finding the roots of the secular */
63 : /* equation via the routine DLAED4 (as called by DLAED3M). */
64 : /* This routine also calculates the eigenvectors of the current */
65 : /* problem. */
66 :
67 : /* If(JOBZ.EQ.'D') then the final stage consists */
68 : /* of computing the updated eigenvectors directly using the updated */
69 : /* eigenvalues. The eigenvectors for the current problem are multiplied */
70 : /* with the eigenvectors from the overall problem. */
71 :
72 : /* Arguments */
73 : /* ========= */
74 :
75 : /* JOBZ (input) CHARACTER*1 */
76 : /* = 'N': Compute eigenvalues only (not implemented); */
77 : /* = 'D': Compute eigenvalues and eigenvectors. */
78 : /* Eigenvectors are accumulated in the divide-and-conquer */
79 : /* process. */
80 :
81 : /* RKCT (input) INTEGER */
82 : /* The number of the rank modification which is accounted for */
83 : /* (RKCT >= 1). Required parameter, because the update operation of the */
84 : /* modification vector can be performed much more efficiently */
85 : /* if RKCT.EQ.1. In that case, the eigenvector matrix is still */
86 : /* block-diagonal. For RKCT.GE.2 the eigenvector matrix for the update */
87 : /* operation has filled up and is a full matrix. */
88 :
89 : /* N (input) INTEGER */
90 : /* The dimension of the symmetric block tridiagonal matrix. */
91 : /* N >= 0. */
92 :
93 : /* EV (input/output) DOUBLE PRECISION array, dimension (N) */
94 : /* On entry, the eigenvalues (=diagonal values) of the */
95 : /* rank-1-perturbed matrix. */
96 : /* On exit, the eigenvalues of the repaired matrix. */
97 :
98 : /* Q (input/output) DOUBLE PRECISION array, dimension (LDQ,N) */
99 : /* On entry, the eigenvectors of the rank-1-perturbed matrix. */
100 : /* On exit, the eigenvectors of the repaired tridiagonal matrix. */
101 :
102 : /* LDQ (input) INTEGER */
103 : /* The leading dimension of the array Q. LDQ >= max(1,N). */
104 :
105 : /* INDXQ (input/output) INTEGER array, dimension (N) */
106 : /* On entry, the permutation which separately sorts the two */
107 : /* subproblems in EV into ascending order. */
108 : /* On exit, the permutation which will reintegrate the */
109 : /* subproblems back into sorted order, */
110 : /* i.e. EV(INDXQ(I = 1, N)) will be in ascending order. */
111 :
112 : /* RHO (input/output) DOUBLE PRECISION */
113 : /* The scalar in the rank-1 perturbation. Modified (multiplied */
114 : /* by 2) in DSRTDF. */
115 :
116 : /* U (input) DOUBLE PRECISION array; dimension (SBRKP1), where SBRKP1 */
117 : /* is the size of the first (original) block after CUTPNT. */
118 : /* The column vector of the rank-1 subdiagonal connecting the */
119 : /* two diagonal subproblems. */
120 : /* Theoretically, zero entries might have to be appended after U */
121 : /* in order to make it have dimension (N-CUTPNT). However, this */
122 : /* is not required because it can be accounted for in the */
123 : /* matrix-vector product using the argument SBRKP1. */
124 :
125 : /* SBRKP1 (input) INTEGER */
126 : /* Dimension of the relevant (non-zero) part of the vector U. */
127 : /* Equal to the size of the first original block after the */
128 : /* breakpoint. */
129 :
130 : /* V (input) DOUBLE PRECISION array; dimension (SBRK), where SBRK */
131 : /* is the size of the last (original) block before CUTPNT. */
132 : /* The row vector of the rank-1 subdiagonal connecting the two */
133 : /* diagonal subproblems. */
134 : /* Theoretically, zero entries might have to be inserted in front */
135 : /* of V in order to make it have dimension (CUTPNT). However, this */
136 : /* is not required because it can be accounted for in the */
137 : /* matrix-vector product using the argument SBRK. */
138 :
139 : /* SBRK (input) INTEGER */
140 : /* Dimension of the relevant (non-zero) part of the vector V. */
141 : /* Equal to the size of the last original block before the */
142 : /* breakpoint. */
143 :
144 : /* CUTPNT (input) INTEGER */
145 : /* The location of the last eigenvalue of the leading diagonal */
146 : /* block. min(1,N) <= CUTPNT <= max(1,N). */
147 :
148 : /* WORK (workspace) DOUBLE PRECISION array, dimension (LWORK) */
149 :
150 : /* LWORK (input) INTEGER */
151 : /* The dimension of the array WORK. */
152 : /* In order to guarantee correct results in all cases, */
153 : /* LWORK must be at least (2*N**2 + 3*N). In many cases, */
154 : /* less workspace is required. The absolute minimum required is */
155 : /* (N**2 + 3*N). */
156 : /* If the workspace provided is not sufficient, the routine will */
157 : /* return a corresponding error code and report how much workspace */
158 : /* was missing (see INFO). */
159 : /* NOTE: This parameter is needed for determining whether enough */
160 : /* workspace is provided, and, if not, for computing how */
161 : /* much workspace is needed. */
162 :
163 : /* IWORK (workspace) INTEGER array, dimension (4*N) */
164 :
165 : /* TOL (input) DOUBLE PRECISION */
166 : /* User specified deflation tolerance for the routine DSRTDF. */
167 :
168 : /* INFO (output) INTEGER */
169 : /* = 0: successful exit. */
170 : /* < -200: not enough workspace */
171 : /* ABS(INFO + 200) numbers have to be stored in addition */
172 : /* to the workspace provided, otherwise some eigenvectors */
173 : /* will be incorrect. */
174 : /* < 0, > -99: if INFO.EQ.-i, the i-th argument had an */
175 : /* illegal value. */
176 : /* > 0: if INFO.EQ.1, an eigenvalue did not converge */
177 : /* if INFO.EQ.2, the deflation counters DZ and DE do not sum */
178 : /* up to the total number N-K of components */
179 : /* deflated */
180 :
181 : /* Further Details */
182 : /* =============== */
183 :
184 : /* Based on code written by */
185 : /* Wilfried Gansterer and Bob Ward, */
186 : /* Department of Computer Science, University of Tennessee */
187 :
188 : /* Based on the design of the LAPACK code Dlaed1.f written by Jeff */
189 : /* Rutter, Computer Science Division, University of California at */
190 : /* Berkeley, and modified by Francoise Tisseur, University of Tennessee. */
191 :
192 : /* ===================================================================== */
193 :
194 21 : PetscBLASInt i, k, n1, n2, de, is, dz, iw, iz, iq2, nmc, cpp1;
195 21 : PetscBLASInt indx, indxc, indxp, lwmin, idlmda;
196 21 : PetscBLASInt spneed, coltyp, tmpcut, i__1, i__2, one=1, mone=-1;
197 21 : char defl[1];
198 21 : PetscReal done = 1.0, dzero = 0.0;
199 :
200 21 : PetscFunctionBegin;
201 21 : *info = 0;
202 21 : lwmin = n*n + n * 3;
203 :
204 21 : if (n < 0) *info = -3;
205 21 : else if (ldq < PetscMax(1,n)) *info = -6;
206 21 : else if (cutpnt < PetscMin(1,n) || cutpnt > PetscMax(1,n)) *info = -13;
207 21 : else if (lwork < lwmin) *info = -15;
208 21 : PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %" PetscBLASInt_FMT " in DMERG2",-(*info));
209 :
210 : /* **************************************************************************** */
211 :
212 : /* Quick return if possible */
213 :
214 21 : if (n == 0) PetscFunctionReturn(PETSC_SUCCESS);
215 :
216 : /* **************************************************************************** */
217 :
218 : /* The following values are integer pointers which indicate */
219 : /* the portion of the workspace used by a particular array in DSRTDF */
220 : /* and DLAED3M. */
221 :
222 21 : iz = 0;
223 21 : idlmda = iz + n;
224 21 : iw = idlmda + n;
225 21 : iq2 = iw + n;
226 21 : is = iq2 + n * n;
227 :
228 : /* After the pointer IS the matrix S is stored and read in WORK */
229 : /* in the routine DLAED3M. */
230 :
231 21 : indx = 0;
232 21 : indxc = indx + n;
233 21 : coltyp = indxc + n;
234 21 : indxp = coltyp + n;
235 :
236 : /* If eigenvectors are to be accumulated in the divide-and-conquer */
237 : /* process (JOBZ.EQ.'D') form the z-vector which consists of */
238 : /* Q_1^T * V and Q_2^T * U. */
239 :
240 21 : cpp1 = cutpnt + 1;
241 21 : if (rkct == 1) {
242 :
243 : /* for the first rank modification the eigenvector matrix has */
244 : /* special block-diagonal structure and therefore Q_1^T * V and */
245 : /* Q_2^T * U can be formed separately. */
246 :
247 7 : PetscCallBLAS("BLASgemv",BLASgemv_("T", &sbrk, &cutpnt, &done,
248 : &q[cutpnt - sbrk], &ldq, v, &one, &dzero, &work[iz], &one));
249 7 : nmc = n - cutpnt;
250 7 : PetscCallBLAS("BLASgemv",BLASgemv_("T", &sbrkp1, &nmc, &done,
251 : &q[cpp1-1 + (cpp1-1)*ldq], &ldq, u,
252 : &one, &dzero, &work[iz + cutpnt], &one));
253 :
254 : } else {
255 :
256 : /* for the higher rank modifications, the vectors V and U */
257 : /* have to be multiplied with the full eigenvector matrix */
258 :
259 14 : PetscCallBLAS("BLASgemv",BLASgemv_("T", &sbrk, &n, &done,
260 : &q[cutpnt - sbrk], &ldq, v, &one, &dzero, &work[iz], &one));
261 14 : PetscCallBLAS("BLASgemv",BLASgemv_("T", &sbrkp1, &n, &done, &q[cpp1-1],
262 : &ldq, u, &one, &done, &work[iz], &one));
263 :
264 : }
265 :
266 : /* **************************************************************************** */
267 :
268 : /* Deflate eigenvalues. */
269 :
270 21 : if (rkct == 1) {
271 :
272 : /* for the first rank modification we need the actual cutpoint */
273 :
274 7 : n1 = cutpnt;
275 7 : tmpcut = cutpnt;
276 : } else {
277 :
278 : /* for the later rank modifications there is no actual cutpoint any more */
279 :
280 14 : n1 = n;
281 :
282 : /* The original value of CUTPNT has to be preserved for the next time */
283 : /* this subroutine is called (therefore, CUTPNT is an INPUT parameter */
284 : /* and not to be changed). Thus, assign N to TMPCUT and use the local */
285 : /* variable TMPCUT from now on for the cut point. */
286 :
287 14 : tmpcut = n;
288 : }
289 :
290 : /* initialize the flag DEFL (indicates whether deflation occurred - */
291 : /* this information is needed later in DLAED3M) */
292 :
293 21 : *(unsigned char *)defl = '0';
294 :
295 : /* call DSRTDF for deflation */
296 :
297 21 : PetscCall(BDC_dsrtdf_(&k, n, n1, ev, q, ldq, indxq, rho, &work[iz],
298 : &work[idlmda], &work[iw], &work[iq2], &iwork[indx],
299 : &iwork[indxc], &iwork[indxp], &iwork[coltyp], tol, &dz, &de, info));
300 21 : PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_LIB,"dmerg2: error in dsrtdf, info = %" PetscBLASInt_FMT,*info);
301 :
302 21 : if (k < n) {
303 :
304 : /* ....some deflation occurred in dsrtdf, set the flag DEFL */
305 : /* (needed in DLAED3M.f, since Givens rotations need to be */
306 : /* applied to the eigenvector matrix only if some deflation */
307 : /* happened) */
308 :
309 9 : *(unsigned char *)defl = '1';
310 : }
311 :
312 : /* **************************************************************************** */
313 :
314 : /* Solve the Secular Equation. */
315 :
316 21 : if (k != 0) {
317 :
318 : /* ....not everything was deflated. */
319 :
320 : /* ....check whether enough workspace is available: */
321 :
322 : /* Note that the following (upper) bound SPNEED for the workspace */
323 : /* requirements should also hold in the extreme case TMPCUT=N, */
324 : /* which happens for every rank modification after the first one. */
325 :
326 21 : i__1 = (iwork[coltyp] + iwork[coltyp+1]) * k;
327 21 : i__2 = (iwork[coltyp+1] + iwork[coltyp + 2]) * k;
328 21 : spneed = is + PetscMax(i__1,i__2) - 1;
329 :
330 21 : PetscCheck(spneed<=lwork,PETSC_COMM_SELF,PETSC_ERR_MEM,"dmerg2: Workspace needed exceeds the workspace provided by %" PetscBLASInt_FMT " numbers",spneed-lwork);
331 :
332 : /* calling DLAED3M for solving the secular equation. */
333 :
334 21 : PetscCall(BDC_dlaed3m_(jobz, defl, k, n, tmpcut, ev, q, ldq,
335 : *rho, &work[idlmda], &work[iq2], &iwork[indxc], &iwork[coltyp],
336 : &work[iw], &work[is], info, 1, 1));
337 21 : PetscCheck(!*info,PETSC_COMM_SELF,PETSC_ERR_LIB,"dmerg2: error in dlaed3m, info = %" PetscBLASInt_FMT,*info);
338 :
339 : /* Prepare the INDXQ sorting permutation. */
340 :
341 21 : n1 = k;
342 21 : n2 = n - k;
343 21 : PetscCallBLAS("LAPACKlamrg",LAPACKlamrg_(&n1, &n2, ev, &one, &mone, indxq));
344 21 : if (k == 0) for (i = 0; i < n; ++i) indxq[i] = i+1;
345 :
346 : } else {
347 :
348 : /* ....everything was deflated (K.EQ.0) */
349 :
350 0 : for (i = 0; i < n; ++i) indxq[i] = i+1;
351 : }
352 21 : PetscFunctionReturn(PETSC_SUCCESS);
353 : }
|