Skip to content

Commit fc9bcef

Browse files
Data alignment fix for SSIM.
WebRtc_UWord64[2] wasn't always aligned to 128 bytes, which is necessary for _mm_store_si128. By declaring the variable as __m128i it will always be 128 bytes aligned. Incorrect include files. __m128i is defined in emmintrin.h for visual studio. Extra include on mac and linux is not a problem. Review URL: http://webrtc-codereview.appspot.com/239013 git-svn-id: http://webrtc.googlecode.com/svn/trunk@816 4adac7df-926f-26a2-2b94-8c16560cd09d
1 parent 78c767f commit fc9bcef

File tree

1 file changed

+22
-10
lines changed

1 file changed

+22
-10
lines changed

src/modules/video_coding/main/test/video_metrics.cc

+22-10
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ Ssim8x8C(WebRtc_UWord8 *s, WebRtc_Word32 sp,
164164
}
165165

166166
#if defined(WEBRTC_USE_SSE2)
167+
#include <emmintrin.h>
167168
#include <xmmintrin.h>
168169
static double
169170
Ssim8x8Sse2(WebRtc_UWord8 *s, WebRtc_Word32 sp,
@@ -200,28 +201,39 @@ Ssim8x8Sse2(WebRtc_UWord8 *s, WebRtc_Word32 sp,
200201
const __m128i sum_r_32 = _mm_add_epi32(_mm_unpackhi_epi16(sum_r_16, z),
201202
_mm_unpacklo_epi16(sum_r_16, z));
202203

203-
WebRtc_UWord64 sum_s_64[2];
204-
WebRtc_UWord64 sum_r_64[2];
205-
WebRtc_UWord64 sum_sq_s_64[2];
206-
WebRtc_UWord64 sum_sq_r_64[2];
207-
WebRtc_UWord64 sum_sxr_64[2];
204+
__m128i sum_s_128;
205+
__m128i sum_r_128;
206+
__m128i sum_sq_s_128;
207+
__m128i sum_sq_r_128;
208+
__m128i sum_sxr_128;
208209

209-
_mm_store_si128 ((__m128i*)sum_s_64,
210+
_mm_store_si128 (&sum_s_128,
210211
_mm_add_epi64(_mm_unpackhi_epi32(sum_s_32, z),
211212
_mm_unpacklo_epi32(sum_s_32, z)));
212-
_mm_store_si128 ((__m128i*)sum_r_64,
213+
_mm_store_si128 (&sum_r_128,
213214
_mm_add_epi64(_mm_unpackhi_epi32(sum_r_32, z),
214215
_mm_unpacklo_epi32(sum_r_32, z)));
215-
_mm_store_si128 ((__m128i*)sum_sq_s_64,
216+
_mm_store_si128 (&sum_sq_s_128,
216217
_mm_add_epi64(_mm_unpackhi_epi32(sum_sq_s_32, z),
217218
_mm_unpacklo_epi32(sum_sq_s_32, z)));
218-
_mm_store_si128 ((__m128i*)sum_sq_r_64,
219+
_mm_store_si128 (&sum_sq_r_128,
219220
_mm_add_epi64(_mm_unpackhi_epi32(sum_sq_r_32, z),
220221
_mm_unpacklo_epi32(sum_sq_r_32, z)));
221-
_mm_store_si128 ((__m128i*)sum_sxr_64,
222+
_mm_store_si128 (&sum_sxr_128,
222223
_mm_add_epi64(_mm_unpackhi_epi32(sum_sxr_32, z),
223224
_mm_unpacklo_epi32(sum_sxr_32, z)));
224225

226+
const WebRtc_UWord64 *sum_s_64 =
227+
reinterpret_cast<WebRtc_UWord64*>(&sum_s_128);
228+
const WebRtc_UWord64 *sum_r_64 =
229+
reinterpret_cast<WebRtc_UWord64*>(&sum_r_128);
230+
const WebRtc_UWord64 *sum_sq_s_64 =
231+
reinterpret_cast<WebRtc_UWord64*>(&sum_sq_s_128);
232+
const WebRtc_UWord64 *sum_sq_r_64 =
233+
reinterpret_cast<WebRtc_UWord64*>(&sum_sq_r_128);
234+
const WebRtc_UWord64 *sum_sxr_64 =
235+
reinterpret_cast<WebRtc_UWord64*>(&sum_sxr_128);
236+
225237
const WebRtc_UWord64 sum_s = sum_s_64[0] + sum_s_64[1];
226238
const WebRtc_UWord64 sum_r = sum_r_64[0] + sum_r_64[1];
227239
const WebRtc_UWord64 sum_sq_s = sum_sq_s_64[0] + sum_sq_s_64[1];

0 commit comments

Comments
 (0)