Skip to content

Commit e34601f

Browse files
committed
arm neon: multiply mat4 with vec4
1 parent fa01a30 commit e34601f

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed

include/cglm/mat4.h

+2
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,8 @@ void
358358
glm_mat4_mulv(mat4 m, vec4 v, vec4 dest) {
359359
#if defined( __SSE__ ) || defined( __SSE2__ )
360360
glm_mat4_mulv_sse2(m, v, dest);
361+
#elif defined(CGLM_NEON_FP)
362+
glm_mat4_mulv_neon(m, v, dest);
361363
#else
362364
vec4 res;
363365
res[0] = m[0][0] * v[0] + m[1][0] * v[1] + m[2][0] * v[2] + m[3][0] * v[3];

include/cglm/simd/neon/mat4.h

+22
Original file line numberDiff line numberDiff line change
@@ -79,5 +79,27 @@ glm_mat4_mul_neon(mat4 m1, mat4 m2, mat4 dest) {
7979
vst1q_f32(dest[3], d3);
8080
}
8181

82+
CGLM_INLINE
83+
void
84+
glm_mat4_mulv_neon(mat4 m, vec4 v, vec4 dest) {
85+
float32x4_t l0, l1, l2, l3;
86+
float32x2_t vlo, vhi;
87+
88+
l0 = vld1q_f32(m[0]);
89+
l1 = vld1q_f32(m[1]);
90+
l2 = vld1q_f32(m[2]);
91+
l3 = vld1q_f32(m[3]);
92+
93+
vlo = vld1_f32(&v[0]);
94+
vhi = vld1_f32(&v[2]);
95+
96+
l0 = vmulq_lane_f32(l0, vlo, 0);
97+
l0 = vmlaq_lane_f32(l0, l1, vlo, 1);
98+
l0 = vmlaq_lane_f32(l0, l2, vhi, 0);
99+
l0 = vmlaq_lane_f32(l0, l3, vhi, 1);
100+
101+
vst1q_f32(dest, l0);
102+
}
103+
82104
#endif
83105
#endif /* cglm_mat4_neon_h */

0 commit comments

Comments
 (0)