Skip to content

Commit 50134e1

Browse files
authored
Merge pull request #1961 from folkertdev/pmadd-correct-signedness
correct signedness of pmadd arguments
2 parents cb32437 + 2a98dca commit 50134e1

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

crates/core_arch/src/x86/avx2.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1773,7 +1773,7 @@ pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
17731773
#[cfg_attr(test, assert_instr(vpmaddubsw))]
17741774
#[stable(feature = "simd_x86", since = "1.27.0")]
17751775
pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
1776-
unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32())) }
1776+
unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_i8x32())) }
17771777
}
17781778

17791779
/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
@@ -3702,7 +3702,7 @@ unsafe extern "C" {
37023702
#[link_name = "llvm.x86.avx2.phsub.sw"]
37033703
fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
37043704
#[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
3705-
fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16;
3705+
fn pmaddubsw(a: u8x32, b: i8x32) -> i16x16;
37063706
#[link_name = "llvm.x86.avx2.mpsadbw"]
37073707
fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
37083708
#[link_name = "llvm.x86.avx2.pmul.hr.sw"]

crates/core_arch/src/x86/avx512bw.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5955,7 +5955,7 @@ pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
59555955
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
59565956
#[cfg_attr(test, assert_instr(vpmaddubsw))]
59575957
pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
5958-
unsafe { transmute(vpmaddubsw(a.as_i8x64(), b.as_i8x64())) }
5958+
unsafe { transmute(vpmaddubsw(a.as_u8x64(), b.as_i8x64())) }
59595959
}
59605960

59615961
/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -11688,7 +11688,7 @@ unsafe extern "C" {
1168811688
fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
1168911689

1169011690
#[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
11691-
fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32;
11691+
fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32;
1169211692

1169311693
#[link_name = "llvm.x86.avx512.packssdw.512"]
1169411694
fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;

0 commit comments

Comments
 (0)