Skip to content

Commit 4e2b611

Browse files
msvc improvement
1 parent 45831bd commit 4e2b611

1 file changed

Lines changed: 6 additions & 3 deletions

File tree

csrc/cpu_ops.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ static inline __m512 set_fp4_lut() {
263263
// DATA_TYPE: 1 = FP4, 2 = NF4
264264
template <typename T, int DATA_TYPE>
265265
void dequantizeBlockwise4bitCpu(
266-
unsigned char* __restrict A, const float* __restrict absmax, T* __restrict out, long long blocksize, long long m, long long n
266+
unsigned char* A, const float* absmax, T* out, long long blocksize, long long m, long long n
267267
) {
268268
static_assert(DATA_TYPE == 1 || DATA_TYPE == 2, "dequantizeBlockwise4bitCpu called with non 4-bit DATA_TYPE");
269269
if (blocksize <= 0 || m < 0 || n <= 0)
@@ -408,7 +408,7 @@ void dequantizeBlockwise4bitCpu(
408408

409409
template <typename T>
410410
void dequantizeBlockwise8bitCpu(
411-
float* __restrict code, unsigned char* __restrict A, const float* __restrict absmax, T* __restrict out, long long blocksize, long long n
411+
float* code, unsigned char* A, const float* absmax, T* out, long long blocksize, long long n
412412
) {
413413
if (blocksize <= 0 || n <= 0)
414414
return;
@@ -418,6 +418,9 @@ void dequantizeBlockwise8bitCpu(
418418
long long valid_items = (n - block_idx >= blocksize ? blocksize : n - block_idx);
419419
long long block_end = block_idx + valid_items;
420420
float scale = absmax[block_idx / blocksize];
421+
#ifdef _MSC_VER
422+
#pragma loop(ivdep)
423+
#endif
421424
for (long long i = block_idx; i < block_end; ++i) {
422425
float v = code[A[i]] * scale;
423426
if constexpr (std::is_same<T, bf16_t>::value) {
@@ -518,7 +521,7 @@ static inline uint16_t norm_to_lut_index(float val) {
518521
}
519522

520523
template <typename T>
521-
void quantize_cpu_impl(float* __restrict code, const T* __restrict A, float* __restrict absmax, unsigned char* __restrict out, long long blocksize, long long n) {
524+
void quantize_cpu_impl(float* code, const T* A, float* absmax, unsigned char* out, long long blocksize, long long n) {
522525
if (blocksize <= 0 || n <= 0)
523526
return;
524527

0 commit comments

Comments
 (0)