Skip to content

Commit 6ec7f2b

Browse files
authored
Merge pull request #1080 from jprhyne/master
Adding a recursive xLARFT
2 parents 7b4c3a3 + e9b05ef commit 6ec7f2b

File tree

10 files changed

+3105
-578
lines changed

10 files changed

+3105
-578
lines changed

SRC/VARIANTS/Makefile

+8-2
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,11 @@ LUREC = lu/REC/cgetrf.o lu/REC/dgetrf.o lu/REC/sgetrf.o lu/REC/zgetrf.o
3030

3131
QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o
3232

33+
LARFTL2 = larft/LL-LVL2/clarft.o larft/LL-LVL2/dlarft.o larft/LL-LVL2/slarft.o larft/LL-LVL2/zlarft.o
34+
3335

3436
.PHONY: all
35-
all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a
37+
all: cholrl.a choltop.a lucr.a lull.a lurec.a qrll.a larftl2.a
3638

3739
cholrl.a: $(CHOLRL)
3840
$(AR) $(ARFLAGS) $@ $^
@@ -58,9 +60,13 @@ qrll.a: $(QRLL)
5860
$(AR) $(ARFLAGS) $@ $^
5961
$(RANLIB) $@
6062

63+
larftl2.a: $(LARFTL2)
64+
$(AR) $(ARFLAGS) $@ $^
65+
$(RANLIB) $@
66+
6167
.PHONY: clean cleanobj cleanlib
6268
clean: cleanobj cleanlib
6369
cleanobj:
64-
rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL)
70+
rm -f $(CHOLRL) $(CHOLTOP) $(LUCR) $(LULL) $(LUREC) $(QRLL) $(LARFTL2)
6571
cleanlib:
6672
rm -f *.a

SRC/VARIANTS/README

+2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ This directory contains several variants of LAPACK routines in single/double/com
2323
- [sdcz]geqrf with QR Left Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/qr/LL
2424
- [sdcz]potrf with Cholesky Right Looking Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/RL
2525
- [sdcz]potrf with Cholesky Top Level 3 BLAS version algorithm [2]- Directory: SRC/VARIANTS/cholesky/TOP
26+
- [sdcz]larft using a Left Looking Level 2 BLAS version algorithm - Directory: SRC/VARIANTS/larft/LL-LVL2
2627

2728
References:For a more detailed description please refer to
2829
- [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
@@ -44,6 +45,7 @@ Corresponding libraries created in SRC/VARIANTS:
4445
- QR Left Looking : qrll.a
4546
- Cholesky Right Looking : cholrl.a
4647
- Cholesky Top : choltop.a
48+
- LARFT Level 2: larftl2.a
4749

4850

4951
===========

SRC/VARIANTS/larft/LL-LVL2/clarft.f

+328
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,328 @@
1+
*> \brief \b CLARFT VARIANT: left-looking Level 2 BLAS version of the algorithm
2+
*
3+
* =========== DOCUMENTATION ===========
4+
*
5+
* Online html documentation available at
6+
* http://www.netlib.org/lapack/explore-html/
7+
*
8+
*> \htmlonly
9+
*> Download CLARFT + dependencies
10+
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clarft.f">
11+
*> [TGZ]</a>
12+
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clarft.f">
13+
*> [ZIP]</a>
14+
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clarft.f">
15+
*> [TXT]</a>
16+
*> \endhtmlonly
17+
*
18+
* Definition:
19+
* ===========
20+
*
21+
* SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
22+
*
23+
* .. Scalar Arguments ..
24+
* CHARACTER DIRECT, STOREV
25+
* INTEGER K, LDT, LDV, N
26+
* ..
27+
* .. Array Arguments ..
28+
* COMPLEX T( LDT, * ), TAU( * ), V( LDV, * )
29+
* ..
30+
*
31+
*
32+
*> \par Purpose:
33+
* =============
34+
*>
35+
*> \verbatim
36+
*>
37+
*> CLARFT forms the triangular factor T of a complex block reflector H
38+
*> of order n, which is defined as a product of k elementary reflectors.
39+
*>
40+
*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
41+
*>
42+
*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
43+
*>
44+
*> If STOREV = 'C', the vector which defines the elementary reflector
45+
*> H(i) is stored in the i-th column of the array V, and
46+
*>
47+
*> H = I - V * T * V**H
48+
*>
49+
*> If STOREV = 'R', the vector which defines the elementary reflector
50+
*> H(i) is stored in the i-th row of the array V, and
51+
*>
52+
*> H = I - V**H * T * V
53+
*> \endverbatim
54+
*
55+
* Arguments:
56+
* ==========
57+
*
58+
*> \param[in] DIRECT
59+
*> \verbatim
60+
*> DIRECT is CHARACTER*1
61+
*> Specifies the order in which the elementary reflectors are
62+
*> multiplied to form the block reflector:
63+
*> = 'F': H = H(1) H(2) . . . H(k) (Forward)
64+
*> = 'B': H = H(k) . . . H(2) H(1) (Backward)
65+
*> \endverbatim
66+
*>
67+
*> \param[in] STOREV
68+
*> \verbatim
69+
*> STOREV is CHARACTER*1
70+
*> Specifies how the vectors which define the elementary
71+
*> reflectors are stored (see also Further Details):
72+
*> = 'C': columnwise
73+
*> = 'R': rowwise
74+
*> \endverbatim
75+
*>
76+
*> \param[in] N
77+
*> \verbatim
78+
*> N is INTEGER
79+
*> The order of the block reflector H. N >= 0.
80+
*> \endverbatim
81+
*>
82+
*> \param[in] K
83+
*> \verbatim
84+
*> K is INTEGER
85+
*> The order of the triangular factor T (= the number of
86+
*> elementary reflectors). K >= 1.
87+
*> \endverbatim
88+
*>
89+
*> \param[in] V
90+
*> \verbatim
91+
*> V is COMPLEX array, dimension
92+
*> (LDV,K) if STOREV = 'C'
93+
*> (LDV,N) if STOREV = 'R'
94+
*> The matrix V. See further details.
95+
*> \endverbatim
96+
*>
97+
*> \param[in] LDV
98+
*> \verbatim
99+
*> LDV is INTEGER
100+
*> The leading dimension of the array V.
101+
*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
102+
*> \endverbatim
103+
*>
104+
*> \param[in] TAU
105+
*> \verbatim
106+
*> TAU is COMPLEX array, dimension (K)
107+
*> TAU(i) must contain the scalar factor of the elementary
108+
*> reflector H(i).
109+
*> \endverbatim
110+
*>
111+
*> \param[out] T
112+
*> \verbatim
113+
*> T is COMPLEX array, dimension (LDT,K)
114+
*> The k by k triangular factor T of the block reflector.
115+
*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
116+
*> lower triangular. The rest of the array is not used.
117+
*> \endverbatim
118+
*>
119+
*> \param[in] LDT
120+
*> \verbatim
121+
*> LDT is INTEGER
122+
*> The leading dimension of the array T. LDT >= K.
123+
*> \endverbatim
124+
*
125+
* Authors:
126+
* ========
127+
*
128+
*> \author Univ. of Tennessee
129+
*> \author Univ. of California Berkeley
130+
*> \author Univ. of Colorado Denver
131+
*> \author NAG Ltd.
132+
*
133+
*> \ingroup larft
134+
*
135+
*> \par Further Details:
136+
* =====================
137+
*>
138+
*> \verbatim
139+
*>
140+
*> The shape of the matrix V and the storage of the vectors which define
141+
*> the H(i) is best illustrated by the following example with n = 5 and
142+
*> k = 3. The elements equal to 1 are not stored.
143+
*>
144+
*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R':
145+
*>
146+
*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 )
147+
*> ( v1 1 ) ( 1 v2 v2 v2 )
148+
*> ( v1 v2 1 ) ( 1 v3 v3 )
149+
*> ( v1 v2 v3 )
150+
*> ( v1 v2 v3 )
151+
*>
152+
*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R':
153+
*>
154+
*> V = ( v1 v2 v3 ) V = ( v1 v1 1 )
155+
*> ( v1 v2 v3 ) ( v2 v2 v2 1 )
156+
*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 )
157+
*> ( 1 v3 )
158+
*> ( 1 )
159+
*> \endverbatim
160+
*>
161+
* =====================================================================
162+
SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
163+
*
164+
* -- LAPACK auxiliary routine --
165+
* -- LAPACK is a software package provided by Univ. of Tennessee, --
166+
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
167+
*
168+
* .. Scalar Arguments ..
169+
CHARACTER DIRECT, STOREV
170+
INTEGER K, LDT, LDV, N
171+
* ..
172+
* .. Array Arguments ..
173+
COMPLEX T( LDT, * ), TAU( * ), V( LDV, * )
174+
* ..
175+
*
176+
* =====================================================================
177+
*
178+
* .. Parameters ..
179+
COMPLEX ONE, ZERO
180+
PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ),
181+
$ ZERO = ( 0.0E+0, 0.0E+0 ) )
182+
* ..
183+
* .. Local Scalars ..
184+
INTEGER I, J, PREVLASTV, LASTV
185+
* ..
186+
* .. External Subroutines ..
187+
EXTERNAL CGEMM, CGEMV, CTRMV
188+
* ..
189+
* .. External Functions ..
190+
LOGICAL LSAME
191+
EXTERNAL LSAME
192+
* ..
193+
* .. Executable Statements ..
194+
*
195+
* Quick return if possible
196+
*
197+
IF( N.EQ.0 )
198+
$ RETURN
199+
*
200+
IF( LSAME( DIRECT, 'F' ) ) THEN
201+
PREVLASTV = N
202+
DO I = 1, K
203+
PREVLASTV = MAX( PREVLASTV, I )
204+
IF( TAU( I ).EQ.ZERO ) THEN
205+
*
206+
* H(i) = I
207+
*
208+
DO J = 1, I
209+
T( J, I ) = ZERO
210+
END DO
211+
ELSE
212+
*
213+
* general case
214+
*
215+
IF( LSAME( STOREV, 'C' ) ) THEN
216+
* Skip any trailing zeros.
217+
DO LASTV = N, I+1, -1
218+
IF( V( LASTV, I ).NE.ZERO ) EXIT
219+
END DO
220+
DO J = 1, I-1
221+
T( J, I ) = -TAU( I ) * CONJG( V( I , J ) )
222+
END DO
223+
J = MIN( LASTV, PREVLASTV )
224+
*
225+
* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i)
226+
*
227+
CALL CGEMV( 'Conjugate transpose', J-I, I-1,
228+
$ -TAU( I ), V( I+1, 1 ), LDV,
229+
$ V( I+1, I ), 1,
230+
$ ONE, T( 1, I ), 1 )
231+
ELSE
232+
* Skip any trailing zeros.
233+
DO LASTV = N, I+1, -1
234+
IF( V( I, LASTV ).NE.ZERO ) EXIT
235+
END DO
236+
DO J = 1, I-1
237+
T( J, I ) = -TAU( I ) * V( J , I )
238+
END DO
239+
J = MIN( LASTV, PREVLASTV )
240+
*
241+
* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H
242+
*
243+
CALL CGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ),
244+
$ V( 1, I+1 ), LDV, V( I, I+1 ), LDV,
245+
$ ONE, T( 1, I ), LDT )
246+
END IF
247+
*
248+
* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
249+
*
250+
CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1,
251+
$ T,
252+
$ LDT, T( 1, I ), 1 )
253+
T( I, I ) = TAU( I )
254+
IF( I.GT.1 ) THEN
255+
PREVLASTV = MAX( PREVLASTV, LASTV )
256+
ELSE
257+
PREVLASTV = LASTV
258+
END IF
259+
END IF
260+
END DO
261+
ELSE
262+
PREVLASTV = 1
263+
DO I = K, 1, -1
264+
IF( TAU( I ).EQ.ZERO ) THEN
265+
*
266+
* H(i) = I
267+
*
268+
DO J = I, K
269+
T( J, I ) = ZERO
270+
END DO
271+
ELSE
272+
*
273+
* general case
274+
*
275+
IF( I.LT.K ) THEN
276+
IF( LSAME( STOREV, 'C' ) ) THEN
277+
* Skip any leading zeros.
278+
DO LASTV = 1, I-1
279+
IF( V( LASTV, I ).NE.ZERO ) EXIT
280+
END DO
281+
DO J = I+1, K
282+
T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) )
283+
END DO
284+
J = MAX( LASTV, PREVLASTV )
285+
*
286+
* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i)
287+
*
288+
CALL CGEMV( 'Conjugate transpose', N-K+I-J, K-I,
289+
$ -TAU( I ), V( J, I+1 ), LDV, V( J, I ),
290+
$ 1, ONE, T( I+1, I ), 1 )
291+
ELSE
292+
* Skip any leading zeros.
293+
DO LASTV = 1, I-1
294+
IF( V( I, LASTV ).NE.ZERO ) EXIT
295+
END DO
296+
DO J = I+1, K
297+
T( J, I ) = -TAU( I ) * V( J, N-K+I )
298+
END DO
299+
J = MAX( LASTV, PREVLASTV )
300+
*
301+
* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H
302+
*
303+
CALL CGEMM( 'N', 'C', K-I, 1, N-K+I-J,
304+
$ -TAU( I ),
305+
$ V( I+1, J ), LDV, V( I, J ), LDV,
306+
$ ONE, T( I+1, I ), LDT )
307+
END IF
308+
*
309+
* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
310+
*
311+
CALL CTRMV( 'Lower', 'No transpose', 'Non-unit',
312+
$ K-I,
313+
$ T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
314+
IF( I.GT.1 ) THEN
315+
PREVLASTV = MIN( PREVLASTV, LASTV )
316+
ELSE
317+
PREVLASTV = LASTV
318+
END IF
319+
END IF
320+
T( I, I ) = TAU( I )
321+
END IF
322+
END DO
323+
END IF
324+
RETURN
325+
*
326+
* End of CLARFT
327+
*
328+
END

0 commit comments

Comments
 (0)