@@ -525,7 +525,7 @@ void ggml_gemv_q4_K_8x8_q8_K(int n,
525525 UNUSED (ncols_interleaved);
526526 UNUSED (blocklen);
527527
528- #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_DOTPROD)
528+ #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON)
529529 constexpr int col_pairs = ncols_interleaved / 2 ;
530530 const uint8x16_t m4b = vdupq_n_u8 (0x0f );
531531
@@ -596,15 +596,15 @@ void ggml_gemv_q4_K_8x8_q8_K(int n,
596596 uint8x16_t q4_qs_cp_2 = vld1q_u8 (q4_base + 16 * cp + 128 );
597597 uint8x16_t q4_qs_cp_3 = vld1q_u8 (q4_base + 16 * cp + 192 );
598598
599- acc_lo[cp] = vdotq_s32 (acc_lo[cp], vreinterpretq_s8_u8 (vandq_u8 (q4_qs_cp_0, m4b)), q8_qs[0 ]); // 0 .. 7
600- acc_lo[cp] = vdotq_s32 (acc_lo[cp], vreinterpretq_s8_u8 (vandq_u8 (q4_qs_cp_1, m4b)), q8_qs[1 ]); // 8 ..15
601- acc_lo[cp] = vdotq_s32 (acc_lo[cp], vreinterpretq_s8_u8 (vandq_u8 (q4_qs_cp_2, m4b)), q8_qs[2 ]); // 16..23
602- acc_lo[cp] = vdotq_s32 (acc_lo[cp], vreinterpretq_s8_u8 (vandq_u8 (q4_qs_cp_3, m4b)), q8_qs[3 ]); // 24..31
599+ acc_lo[cp] = ggml_vdotq_s32 (acc_lo[cp], vreinterpretq_s8_u8 (vandq_u8 (q4_qs_cp_0, m4b)), q8_qs[0 ]); // 0 .. 7
600+ acc_lo[cp] = ggml_vdotq_s32 (acc_lo[cp], vreinterpretq_s8_u8 (vandq_u8 (q4_qs_cp_1, m4b)), q8_qs[1 ]); // 8 ..15
601+ acc_lo[cp] = ggml_vdotq_s32 (acc_lo[cp], vreinterpretq_s8_u8 (vandq_u8 (q4_qs_cp_2, m4b)), q8_qs[2 ]); // 16..23
602+ acc_lo[cp] = ggml_vdotq_s32 (acc_lo[cp], vreinterpretq_s8_u8 (vandq_u8 (q4_qs_cp_3, m4b)), q8_qs[3 ]); // 24..31
603603
604- acc_hi[cp] = vdotq_s32 (acc_hi[cp], vreinterpretq_s8_u8 (vshrq_n_u8 (q4_qs_cp_0, 4 )), q8_qs[4 ]); // 32..39
605- acc_hi[cp] = vdotq_s32 (acc_hi[cp], vreinterpretq_s8_u8 (vshrq_n_u8 (q4_qs_cp_1, 4 )), q8_qs[5 ]); // 40..47
606- acc_hi[cp] = vdotq_s32 (acc_hi[cp], vreinterpretq_s8_u8 (vshrq_n_u8 (q4_qs_cp_2, 4 )), q8_qs[6 ]); // 48..55
607- acc_hi[cp] = vdotq_s32 (acc_hi[cp], vreinterpretq_s8_u8 (vshrq_n_u8 (q4_qs_cp_3, 4 )), q8_qs[7 ]); // 56..63
604+ acc_hi[cp] = ggml_vdotq_s32 (acc_hi[cp], vreinterpretq_s8_u8 (vshrq_n_u8 (q4_qs_cp_0, 4 )), q8_qs[4 ]); // 32..39
605+ acc_hi[cp] = ggml_vdotq_s32 (acc_hi[cp], vreinterpretq_s8_u8 (vshrq_n_u8 (q4_qs_cp_1, 4 )), q8_qs[5 ]); // 40..47
606+ acc_hi[cp] = ggml_vdotq_s32 (acc_hi[cp], vreinterpretq_s8_u8 (vshrq_n_u8 (q4_qs_cp_2, 4 )), q8_qs[6 ]); // 48..55
607+ acc_hi[cp] = ggml_vdotq_s32 (acc_hi[cp], vreinterpretq_s8_u8 (vshrq_n_u8 (q4_qs_cp_3, 4 )), q8_qs[7 ]); // 56..63
608608 }
609609
610610
@@ -652,7 +652,7 @@ void ggml_gemv_q4_K_8x8_q8_K(int n,
652652 vst1q_f32 (s + base + 4 , acc_f32[1 ]);
653653 } // for x
654654 return ;
655- #endif
655+ #endif // #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON)
656656 ggml_gemv_q4_K_8x8_q8_K_generic (n, s, bs, vx, vy, nr, nc);
657657}
658658
0 commit comments