@@ -58,6 +58,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5858#define VI3 $vr8
5959#define VI4 $vr19
6060#define VT0 $vr23
61+ #define VMASK $vr7
6162
6263 PROLOGUE
6364 li.d i0, 0
@@ -76,6 +77,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
7677 li.d I, -1
7778 vreplgr2vr.d VI4, I
7879 vffint.d.l VI4, VI4 // -1
80+ li.d I, 0x7fffffffffffffff // Mask for clearing the sign bit
81+ vreplgr2vr.d VMASK, I
7982 bne INCX, TEMP, .L20 // incx != 1
8083
8184 // Init Index
@@ -99,17 +102,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
99102 vld VX1, X, 2 * SIZE
100103 vpickev.d x1, VX1, VX0
101104 vpickod.d x2, VX1, VX0
102- vfmul.d x3, VI4, x1
103- vfmul.d x4, VI4, x2
104- vfcmp.clt.d VT0, x1, VI3
105- vfcmp.clt.d VINC8, x2, VI3
106- vbitsel.v x1, x1, x3, VT0
107- vbitsel.v x2, x2, x4, VINC8
105+ vand.v x1, x1, VMASK
106+ vand.v x2, x2, VMASK
108107 vfadd.d VM0, x1, x2
109108#else
110109 li.w I, -1
111110 vreplgr2vr.w VI4, I
112111 vffint.s.w VI4, VI4 // -1
112+ li.d I, 0x7fffffff // Mask for clearing the sign bit
113+ vreplgr2vr.w VMASK, I
113114 bne INCX, TEMP, .L20 // incx != 1
114115
115116 // Init Index
@@ -141,12 +142,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
141142 vld VX1, X, 4 * SIZE
142143 vpickev.w x1, VX1, VX0
143144 vpickod.w x2, VX1, VX0
144- vfmul.s x3, VI4, x1
145- vfmul.s x4, VI4, x2
146- vfcmp.clt.s VT0, x1, VI3
147- vfcmp.clt.s VINC8, x2, VI3
148- vbitsel.v x1, x1, x3, VT0
149- vbitsel.v x2, x2, x4, VINC8
145+ vand.v x1, x1, VMASK
146+ vand.v x2, x2, VMASK
150147 vfadd.s VM0, x1, x2
151148#endif
152149 .align 3
@@ -159,12 +156,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
159156 addi.d I, I, -1
160157 vpickev.d x1, VX1, VX0
161158 vpickod.d x2, VX1, VX0
162- vfmul.d x3, VI4, x1
163- vfmul.d x4, VI4, x2
164- vfcmp.clt.d VT0, x1, VI3
165- vfcmp.clt.d VINC8, x2, VI3
166- vbitsel.v x1, x1, x3, VT0
167- vbitsel.v x2, x2, x4, VINC8
159+ vand.v x1, x1, VMASK
160+ vand.v x2, x2, VMASK
168161 vfadd.d x1, x1, x2
169162 vfmin.d x3, VM0, x1
170163 vfcmp.ceq.d VT0, x3, VM0
@@ -183,12 +176,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
183176 vpickev.w x1, VX1, VX0
184177 vpickod.w x2, VX1, VX0
185178#endif
186- VFMUL x3, VI4, x1
187- VFMUL x4, VI4, x2
188- VCMPLT VT0, x1, VI3
189- VCMPLT VINC8, x2, VI3
190- vbitsel.v x1, x1, x3, VT0
191- vbitsel.v x2, x2, x4, VINC8
179+ vand.v x1, x1, VMASK
180+ vand.v x2, x2, VMASK
192181 VFADD x1, x1, x2
193182 VFMIN x3, VM0, x1
194183 VCMPEQ VT0, x3, VM0
@@ -264,12 +253,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
264253 vinsgr2vr.d x2, t2, 0
265254 vinsgr2vr.d x1, t3, 1
266255 vinsgr2vr.d x2, t4, 1
267- vfmul.d x3, VI4, x1
268- vfmul.d x4, VI4, x2
269- vfcmp.clt.d VT0, x1, VI3
270- vfcmp.clt.d VINC8, x2, VI3
271- vbitsel.v x1, x1, x3, VT0
272- vbitsel.v x2, x2, x4, VINC8
256+ vand.v x1, x1, VMASK
257+ vand.v x2, x2, VMASK
273258 vfadd.d VM0, x1, x2
274259#else
275260 addi.w i0, i0, 1
@@ -339,12 +324,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
339324 vinsgr2vr.d x1, t3, 1
340325 vinsgr2vr.d x2, t4, 1
341326 vadd.d VI1, VI1, VINC4
342- vfmul.d x3, VI4, x1
343- vfmul.d x4, VI4, x2
344- vfcmp.clt.d VT0, x1, VI3
345- vfcmp.clt.d VINC8, x2, VI3
346- vbitsel.v x1, x1, x3, VT0
347- vbitsel.v x2, x2, x4, VINC8
327+ vand.v x1, x1, VMASK
328+ vand.v x2, x2, VMASK
348329 vfadd.d x1, x1, x2
349330 vfmin.d x3, VM0, x1
350331 ld.d t1, X, 0 * SIZE
@@ -385,12 +366,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
385366 vinsgr2vr.w x2, t4, 3
386367#endif
387368 addi.d I, I, -1
388- VFMUL x3, VI4, x1
389- VFMUL x4, VI4, x2
390- VCMPLT VT0, x1, VI3
391- VCMPLT VINC8, x2, VI3
392- vbitsel.v x1, x1, x3, VT0
393- vbitsel.v x2, x2, x4, VINC8
369+ vand.v x1, x1, VMASK
370+ vand.v x2, x2, VMASK
394371 VFADD x1, x1, x2
395372 VFMIN x3, VM0, x1
396373 VCMPEQ VT0, x3, VM0
0 commit comments