@@ -9025,7 +9025,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
90259025                                     vld1_s8 ((const  int8_t  * )(iq2s_grid  +  (qs [7 ] | ((qh [ib32 + 1 ] << 2 ) &  0x300 )))));
90269026            qs  +=  8 ;
90279027
9028-             vs .val [0 ] =  vreinterpretq_u8_u32 (vdupq_n_u32 (signs [0 ] | (signs [1 ] << 16 )));
9028+             vs .val [0 ] =  vreinterpretq_u8_u32 (vdupq_n_u32 (signs [0 ] | (( uint32_t )  signs [1 ] << 16 )));
90299029            vs .val [1 ] =  vandq_u8 (ggml_vqtbl1q_u8 (vs .val [0 ], mask1 .val [1 ]), mask2 );
90309030            vs .val [0 ] =  vandq_u8 (ggml_vqtbl1q_u8 (vs .val [0 ], mask1 .val [0 ]), mask2 );
90319031            vs .val [0 ] =  vceqq_u8 (vs .val [0 ], mask2 );
@@ -9034,7 +9034,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
90349034            q2s .val [0 ] =  vmulq_s8 (vreinterpretq_s8_u8 (vorrq_u8 (vs .val [0 ], m1 )), q2s .val [0 ]);
90359035            q2s .val [1 ] =  vmulq_s8 (vreinterpretq_s8_u8 (vorrq_u8 (vs .val [1 ], m1 )), q2s .val [1 ]);
90369036
9037-             vs .val [0 ] =  vreinterpretq_u8_u32 (vdupq_n_u32 (signs [2 ] | (signs [3 ] << 16 )));
9037+             vs .val [0 ] =  vreinterpretq_u8_u32 (vdupq_n_u32 (signs [2 ] | (( uint32_t )  signs [3 ] << 16 )));
90389038            vs .val [1 ] =  vandq_u8 (ggml_vqtbl1q_u8 (vs .val [0 ], mask1 .val [1 ]), mask2 );
90399039            vs .val [0 ] =  vandq_u8 (ggml_vqtbl1q_u8 (vs .val [0 ], mask1 .val [0 ]), mask2 );
90409040            vs .val [0 ] =  vceqq_u8 (vs .val [0 ], mask2 );
@@ -9105,12 +9105,12 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
91059105                                                   iq2s_grid [qs [4 ] | ((qh [ib32 + 1 ] << 8 ) &  0x300 )]);
91069106            qs  +=  8 ;
91079107
9108-             __m256i  aux256  =  _mm256_set1_epi32 (signs [0 ] | (signs [1 ] << 16 ));
9108+             __m256i  aux256  =  _mm256_set1_epi32 (signs [0 ] | (( uint32_t )  signs [1 ] << 16 ));
91099109            aux256  =  _mm256_and_si256 (_mm256_shuffle_epi8 (aux256 ,mask1 ), mask2 );
91109110            const  __m256i  s2_1  =  _mm256_cmpeq_epi8 (aux256 , mask2 );
91119111            const  __m256i  q8s_1  =  _mm256_sub_epi8 (_mm256_xor_si256 (s2_1 , q8_1 ), s2_1 );
91129112
9113-             aux256  =  _mm256_set1_epi32 (signs [2 ] | (signs [3 ] << 16 ));
9113+             aux256  =  _mm256_set1_epi32 (signs [2 ] | (( uint32_t )  signs [3 ] << 16 ));
91149114            aux256  =  _mm256_and_si256 (_mm256_shuffle_epi8 (aux256 ,mask1 ), mask2 );
91159115            const  __m256i  s2_2  =  _mm256_cmpeq_epi8 (aux256 , mask2 );
91169116            const  __m256i  q8s_2  =  _mm256_sub_epi8 (_mm256_xor_si256 (s2_2 , q8_2 ), s2_2 );
@@ -9386,7 +9386,7 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void *
93869386                                                        iq3s_grid [idx .index [6 ]], iq3s_grid [idx .index [7 ]]);
93879387
93889388
9389-             vs .val [0 ] =  vreinterpretq_u8_u32 (vdupq_n_u32 (signs [0 ] | (signs [1 ] << 16 )));
9389+             vs .val [0 ] =  vreinterpretq_u8_u32 (vdupq_n_u32 (signs [0 ] | (( uint32_t )  signs [1 ] << 16 )));
93909390            vs .val [1 ] =  vandq_u8 (ggml_vqtbl1q_u8 (vs .val [0 ], mask1 .val [1 ]), mask2 );
93919391            vs .val [0 ] =  vandq_u8 (ggml_vqtbl1q_u8 (vs .val [0 ], mask1 .val [0 ]), mask2 );
93929392            vs .val [0 ] =  vorrq_u8 (vceqq_u8 (vs .val [0 ], mask2 ), m1 );
@@ -9395,7 +9395,7 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void *
93959395            q3s .val [0 ] =  vmulq_s8 (vreinterpretq_s8_u8 (vs .val [0 ]), vreinterpretq_s8_u32 (aux32x4_0 ));
93969396            q3s .val [1 ] =  vmulq_s8 (vreinterpretq_s8_u8 (vs .val [1 ]), vreinterpretq_s8_u32 (aux32x4_1 ));
93979397
9398-             vs .val [0 ] =  vreinterpretq_u8_u32 (vdupq_n_u32 (signs [2 ] | (signs [3 ] << 16 )));
9398+             vs .val [0 ] =  vreinterpretq_u8_u32 (vdupq_n_u32 (signs [2 ] | (( uint32_t )  signs [3 ] << 16 )));
93999399            vs .val [1 ] =  vandq_u8 (ggml_vqtbl1q_u8 (vs .val [0 ], mask1 .val [1 ]), mask2 );
94009400            vs .val [0 ] =  vandq_u8 (ggml_vqtbl1q_u8 (vs .val [0 ], mask1 .val [0 ]), mask2 );
94019401            vs .val [0 ] =  vorrq_u8 (vceqq_u8 (vs .val [0 ], mask2 ), m1 );
0 commit comments