From c930c4d199223543d5bc9f14dc391eaab5cf79fe Mon Sep 17 00:00:00 2001 From: jammar1 <108334558+jammar1@users.noreply.github.com> Date: Tue, 5 Nov 2024 23:58:15 +0000 Subject: [PATCH] Add first NEON SIMD opcode implementations to fast interpreter (#3859) Add some implementations of SIMD opcodes using NEON instructions. Tested using: ```wast (module (import "wasi_snapshot_preview1" "proc_exit" (func $proc_exit (param i32))) (memory (export "memory") 1) (func $assert_true (param v128) local.get 0 v128.any_true i32.eqz if unreachable end ) (func $main (export "_start") i32.const 0 i32.const 32 memory.grow drop i32.const 0 v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 v128.store i32.const 0 v128.load v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 i8x16.eq call $assert_true i32.const 16 v128.const i8x16 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 v128.store i32.const 16 v128.load v128.const i8x16 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 i8x16.eq call $assert_true i32.const 0 v128.load v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 i8x16.eq call $assert_true drop i32.const 0 i32.const 1 memory.grow drop i32.const 0 i64.const 0x7F80FF017E02FE80 i64.store i32.const 0 v128.load8x8_s v128.const i16x8 127 -128 -1 1 126 2 -2 -128 i16x8.eq call $assert_true i32.const 0 i64.const 0x80FE027E01FF807F i64.store i32.const 0 v128.load8x8_u v128.const i16x8 128 254 2 126 1 255 128 127 i16x8.eq call $assert_true i32.const 0 i64.const 0x8000FFFE7FFF0001 i64.store i32.const 0 v128.load16x4_s v128.const i32x4 -32768 -2 32767 1 i32x4.eq call $assert_true i32.const 0 i64.const 0x8000FFFE7FFF0001 i64.store i32.const 0 v128.load16x4_u v128.const i32x4 32768 65534 32767 1 i32x4.eq call $assert_true i32.const 0 i64.const 0x8000000000000001 i64.store i32.const 0 v128.load32x2_s v128.const i64x2 -2147483648 1 i64x2.eq call $assert_true i32.const 0 i64.const 0x8000000000000001 i64.store i32.const 0 v128.load32x2_u v128.const i64x2 2147483648 1 i64x2.eq call $assert_true call $proc_exit ) ) ``` --- build-scripts/config_common.cmake | 3 + build-scripts/runtime_lib.cmake | 4 + core/config.h | 6 + core/iwasm/common/wasm_runtime_common.h | 67 ++ core/iwasm/interpreter/wasm_interp_fast.c | 1050 ++++++++++++++++++++- core/iwasm/interpreter/wasm_loader.c | 18 +- core/iwasm/interpreter/wasm_opcode.h | 4 +- core/iwasm/libraries/simde/simde.cmake | 23 + 8 files changed, 1128 insertions(+), 47 deletions(-) create mode 100644 core/iwasm/libraries/simde/simde.cmake diff --git a/build-scripts/config_common.cmake b/build-scripts/config_common.cmake index 48c5f7be4b..b6503d808d 100644 --- a/build-scripts/config_common.cmake +++ b/build-scripts/config_common.cmake @@ -290,6 +290,9 @@ endif () if (WAMR_BUILD_LIB_RATS EQUAL 1) message (" Lib rats enabled") endif() +if ((WAMR_BUILD_LIB_SIMDE EQUAL 1)) + message (" Lib simde enabled") +endif() if (WAMR_BUILD_MINI_LOADER EQUAL 1) add_definitions (-DWASM_ENABLE_MINI_LOADER=1) message (" WASM mini loader enabled") diff --git a/build-scripts/runtime_lib.cmake b/build-scripts/runtime_lib.cmake index c57cfc57af..29789d671c 100644 --- a/build-scripts/runtime_lib.cmake +++ b/build-scripts/runtime_lib.cmake @@ -155,6 +155,10 @@ if (WAMR_BUILD_LIB_RATS EQUAL 1) include (${IWASM_DIR}/libraries/lib-rats/lib_rats.cmake) endif () +if (WAMR_BUILD_LIB_SIMDE EQUAL 1) + include (${IWASM_DIR}/libraries/simde/simde.cmake) +endif () + if (WAMR_BUILD_WASM_CACHE EQUAL 1) include (${WAMR_ROOT_DIR}/build-scripts/involve_boringssl.cmake) endif () diff --git a/core/config.h b/core/config.h index 6bab4da908..7b07e9eac6 100644 --- a/core/config.h +++ b/core/config.h @@ -318,6 +318,12 @@ #define WASM_ENABLE_SIMD 0 #endif +/* Disable SIMDe (used in the fast interpreter for SIMD opcodes) +unless used elsewhere */ +#ifndef WASM_ENABLE_SIMDE +#define WASM_ENABLE_SIMDE 0 +#endif + /* GC performance profiling */ #ifndef WASM_ENABLE_GC_PERF_PROFILING #define WASM_ENABLE_GC_PERF_PROFILING 0 diff --git a/core/iwasm/common/wasm_runtime_common.h b/core/iwasm/common/wasm_runtime_common.h index 8ec5ea3a50..3c4460b34b 100644 --- a/core/iwasm/common/wasm_runtime_common.h +++ b/core/iwasm/common/wasm_runtime_common.h @@ -73,6 +73,12 @@ STORE_U8(void *addr, uint8_t value) *(uint8 *)addr = value; } +static inline void +STORE_V128(void *addr, V128 value) +{ + *(V128 *)addr = value; +} + /* For LOAD opcodes */ #define LOAD_I64(addr) (*(int64 *)(addr)) #define LOAD_F64(addr) (*(float64 *)(addr)) @@ -80,6 +86,7 @@ STORE_U8(void *addr, uint8_t value) #define LOAD_U32(addr) (*(uint32 *)(addr)) #define LOAD_I16(addr) (*(int16 *)(addr)) #define LOAD_U16(addr) (*(uint16 *)(addr)) +#define LOAD_V128(addr) (*(V128 *)(addr)) #define STORE_PTR(addr, ptr) \ do { \ @@ -264,7 +271,67 @@ STORE_U16(void *addr, uint16_t value) ((uint8_t *)(addr))[0] = u.u8[0]; ((uint8_t *)(addr))[1] = u.u8[1]; } + +static inline void +STORE_V128(void *addr, V128 value) +{ + uintptr_t addr_ = (uintptr_t)(addr); + union { + V128 val; + uint64 u64[2]; + uint32 u32[4]; + uint16 u16[8]; + uint8 u8[16]; + } u; + + if ((addr_ & (uintptr_t)15) == 0) { + *(V128 *)addr = value; + } + else { + u.val = value; + if ((addr_ & (uintptr_t)7) == 0) { + ((uint64 *)(addr))[0] = u.u64[0]; + ((uint64 *)(addr))[1] = u.u64[1]; + } + else { + bh_assert((addr_ & (uintptr_t)3) == 0); + ((uint32 *)addr)[0] = u.u32[0]; + ((uint32 *)addr)[1] = u.u32[1]; + ((uint32 *)addr)[2] = u.u32[2]; + ((uint32 *)addr)[3] = u.u32[3]; + } + } +} + /* For LOAD opcodes */ +static inline V128 +LOAD_V128(void *addr) +{ + uintptr_t addr1 = (uintptr_t)addr; + union { + V128 val; + uint64 u64[2]; + uint32 u32[4]; + uint16 u16[8]; + uint8 u8[16]; + } u; + if ((addr1 & (uintptr_t)15) == 0) + return *(V128 *)addr; + + if ((addr1 & (uintptr_t)7) == 0) { + u.u64[0] = ((uint64 *)addr)[0]; + u.u64[1] = ((uint64 *)addr)[1]; + } + else { + bh_assert((addr1 & (uintptr_t)3) == 0); + u.u32[0] = ((uint32 *)addr)[0]; + u.u32[1] = ((uint32 *)addr)[1]; + u.u32[2] = ((uint32 *)addr)[2]; + u.u32[3] = ((uint32 *)addr)[3]; + } + return u.val; +} + static inline int64 LOAD_I64(void *addr) { diff --git a/core/iwasm/interpreter/wasm_interp_fast.c b/core/iwasm/interpreter/wasm_interp_fast.c index 73e54fca7a..09823f08c8 100644 --- a/core/iwasm/interpreter/wasm_interp_fast.c +++ b/core/iwasm/interpreter/wasm_interp_fast.c @@ -21,6 +21,10 @@ #include "../common/wasm_shared_memory.h" #endif +#if WASM_ENABLE_SIMDE != 0 +#include "simde/wasm/simd128.h" +#endif + typedef int32 CellType_I32; typedef int64 CellType_I64; typedef float32 CellType_F32; @@ -5738,7 +5742,21 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, #endif goto call_func_from_entry; } -#if WASM_ENABLE_SIMD != 0 +#if WASM_ENABLE_SIMDE != 0 +#define SIMD_V128_TO_SIMDE_V128(v) \ + ({ \ + bh_assert(sizeof(V128) == sizeof(simde_v128_t)); \ + simde_v128_t result; \ + bh_memcpy_s(&result, sizeof(simde_v128_t), &(v), sizeof(V128)); \ + result; \ + }) + +#define SIMDE_V128_TO_SIMD_V128(sv, v) \ + do { \ + bh_assert(sizeof(V128) == sizeof(simde_v128_t)); \ + bh_memcpy_s(&(v), sizeof(V128), &(sv), sizeof(simde_v128_t)); \ + } while (0) + HANDLE_OP(WASM_OP_SIMD_PREFIX) { GET_OPCODE(); @@ -5746,19 +5764,129 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, switch (opcode) { /* Memory */ case SIMD_v128_load: + { + uint32 offset, addr; + offset = read_uint32( + frame_ip); // TODO: Check with an offset! + addr = GET_OPERAND(uint32, I32, 0); + frame_ip += 2; + addr_ret = GET_OFFSET(); + CHECK_MEMORY_OVERFLOW(16); + PUT_V128_TO_ADDR(frame_lp + addr_ret, LOAD_V128(maddr)); + break; + } +#define SIMD_LOAD_OP(op_name, simde_func, element_size, num_elements) \ + do { \ + uint32 offset, addr; \ + offset = read_uint32(frame_ip); \ + addr = GET_OPERAND(uint32, I32, 0); \ + frame_ip += 2; \ + addr_ret = GET_OFFSET(); \ + CHECK_MEMORY_OVERFLOW(16); \ + \ + simde_v128_t simde_result = simde_func(maddr); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + V128 reversed_result; \ + for (int i = 0; i < num_elements; i++) { \ + reversed_result.i##element_size##x##num_elements[i] = \ + result.i##element_size##x##num_elements[num_elements - 1 - i]; \ + } \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, reversed_result); \ + \ + break; \ + } while (0) case SIMD_v128_load8x8_s: + { + SIMD_LOAD_OP(SIMD_v128_load8x8_s, + simde_wasm_i16x8_load8x8, 16, 8); + break; + } case SIMD_v128_load8x8_u: + { + SIMD_LOAD_OP(SIMD_v128_load8x8_u, + simde_wasm_u16x8_load8x8, 16, 8); + break; + } case SIMD_v128_load16x4_s: + { + SIMD_LOAD_OP(SIMD_v128_load16x4_s, + simde_wasm_i32x4_load16x4, 32, 4); + break; + } case SIMD_v128_load16x4_u: + { + SIMD_LOAD_OP(SIMD_v128_load16x4_u, + simde_wasm_u32x4_load16x4, 32, 4); + break; + } case SIMD_v128_load32x2_s: + { + SIMD_LOAD_OP(SIMD_v128_load32x2_s, + simde_wasm_i64x2_load32x2, 64, 2); + break; + } case SIMD_v128_load32x2_u: + { + SIMD_LOAD_OP(SIMD_v128_load32x2_u, + simde_wasm_u64x2_load32x2, 64, 2); + break; + } +#define SIMD_LOAD_SPLAT_OP(op_name, simde_func) \ + do { \ + uint32 offset, addr; \ + offset = read_uint32(frame_ip); \ + addr = GET_OPERAND(uint32, I32, 0); \ + frame_ip += 2; \ + addr_ret = GET_OFFSET(); \ + CHECK_MEMORY_OVERFLOW(16); \ + \ + simde_v128_t simde_result = simde_func(maddr); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) + case SIMD_v128_load8_splat: + { + SIMD_LOAD_SPLAT_OP(SIMD_v128_load8_splat, + simde_wasm_v128_load8_splat); + break; + } case SIMD_v128_load16_splat: + { + SIMD_LOAD_SPLAT_OP(SIMD_v128_load16_splat, + simde_wasm_v128_load16_splat); + break; + } case SIMD_v128_load32_splat: + { + SIMD_LOAD_SPLAT_OP(SIMD_v128_load32_splat, + simde_wasm_v128_load32_splat); + break; + } case SIMD_v128_load64_splat: + { + SIMD_LOAD_SPLAT_OP(SIMD_v128_load64_splat, + simde_wasm_v128_load64_splat); + break; + } case SIMD_v128_store: { - wasm_set_exception(module, "unsupported SIMD opcode"); + uint32 offset, addr; + offset = read_uint32(frame_ip); + frame_ip += 2; + addr = GET_OPERAND(uint32, I32, 0); + + V128 data; + data = POP_V128(); + + CHECK_MEMORY_OVERFLOW(16); + STORE_V128(maddr, data); break; } @@ -5773,25 +5901,100 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, PUT_V128_TO_ADDR(frame_lp + addr_ret, *(V128 *)orig_ip); break; } + // TODO: Add a faster SIMD implementation case SIMD_v8x16_shuffle: - case SIMD_v8x16_swizzle: { - wasm_set_exception(module, "unsupported SIMD opcode"); + V128 indices; + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + addr_ret = GET_OFFSET(); + + bh_memcpy_s(&indices, sizeof(V128), frame_ip, + sizeof(V128)); + frame_ip += sizeof(V128); + + V128 result; + for (int i = 0; i < 16; i++) { + uint8_t index = indices.i8x16[i]; + if (index < 16) { + result.i8x16[i] = v1.i8x16[index]; + } + else { + result.i8x16[i] = v2.i8x16[index - 16]; + } + } + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); break; } + case SIMD_v8x16_swizzle: + { + V128 v2 = POP_V128(); + V128 v1 = POP_V128(); + addr_ret = GET_OFFSET(); + simde_v128_t simde_result = simde_wasm_i8x16_swizzle( + SIMD_V128_TO_SIMDE_V128(v1), + SIMD_V128_TO_SIMDE_V128(v2)); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + } /* Splat */ +#define SIMD_SPLAT_OP(simde_func, pop_func, val_type) \ + do { \ + val_type val = pop_func(); \ + addr_ret = GET_OFFSET(); \ + \ + simde_v128_t simde_result = simde_func(val); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) + +#define SIMD_SPLAT_OP_I32(simde_func) SIMD_SPLAT_OP(simde_func, POP_I32, uint32) +#define SIMD_SPLAT_OP_I64(simde_func) SIMD_SPLAT_OP(simde_func, POP_I64, uint64) +#define SIMD_SPLAT_OP_F32(simde_func) \ + SIMD_SPLAT_OP(simde_func, POP_F32, float32) +#define SIMD_SPLAT_OP_F64(simde_func) \ + SIMD_SPLAT_OP(simde_func, POP_F64, float64) + case SIMD_i8x16_splat: + { + SIMD_SPLAT_OP_I32(simde_wasm_i8x16_splat); + break; + } case SIMD_i16x8_splat: + { + SIMD_SPLAT_OP_I32(simde_wasm_i16x8_splat); + break; + } case SIMD_i32x4_splat: + { + SIMD_SPLAT_OP_I32(simde_wasm_i32x4_splat); + break; + } case SIMD_i64x2_splat: + { + SIMD_SPLAT_OP_I64(simde_wasm_i64x2_splat); + break; + } case SIMD_f32x4_splat: + { + SIMD_SPLAT_OP_F32(simde_wasm_f32x4_splat); + break; + } case SIMD_f64x2_splat: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_SPLAT_OP_F64(simde_wasm_f64x2_splat); break; } + // TODO: /* Lane */ case SIMD_i8x16_extract_lane_s: case SIMD_i8x16_extract_lane_u: @@ -5812,89 +6015,238 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } +#define SIMD_DOUBLE_OP(simde_func) \ + do { \ + V128 v1 = POP_V128(); \ + V128 v2 = POP_V128(); \ + addr_ret = GET_OFFSET(); \ + \ + simde_v128_t simde_result = simde_func(SIMD_V128_TO_SIMDE_V128(v1), \ + SIMD_V128_TO_SIMDE_V128(v2)); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) + /* i8x16 comparison operations */ case SIMD_i8x16_eq: { - V128 v1 = POP_V128(); - V128 v2 = POP_V128(); - int i; - addr_ret = GET_OFFSET(); - - V128 result; - for (i = 0; i < 16; i++) { - result.i8x16[i] = - v1.i8x16[i] == v2.i8x16[i] ? 0xff : 0; - } - PUT_V128_TO_ADDR(frame_lp + addr_ret, result); + SIMD_DOUBLE_OP(simde_wasm_i8x16_eq); break; } case SIMD_i8x16_ne: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_ne); + break; + } case SIMD_i8x16_lt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_lt); + break; + } case SIMD_i8x16_lt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_lt); + break; + } case SIMD_i8x16_gt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_gt); + break; + } case SIMD_i8x16_gt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_gt); + break; + } case SIMD_i8x16_le_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_le); + break; + } case SIMD_i8x16_le_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_le); + break; + } case SIMD_i8x16_ge_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_ge); + break; + } case SIMD_i8x16_ge_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i8x16_ge); break; } /* i16x8 comparison operations */ case SIMD_i16x8_eq: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_eq); + break; + } case SIMD_i16x8_ne: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_ne); + break; + } case SIMD_i16x8_lt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_lt); + break; + } case SIMD_i16x8_lt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_lt); + break; + } case SIMD_i16x8_gt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_gt); + break; + } case SIMD_i16x8_gt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_gt); + break; + } case SIMD_i16x8_le_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_le); + break; + } case SIMD_i16x8_le_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_le); + break; + } case SIMD_i16x8_ge_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_ge); + break; + } case SIMD_i16x8_ge_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i16x8_ge); break; } /* i32x4 comparison operations */ case SIMD_i32x4_eq: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_eq); + break; + } case SIMD_i32x4_ne: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_ne); + break; + } case SIMD_i32x4_lt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_lt); + break; + } case SIMD_i32x4_lt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_lt); + break; + } case SIMD_i32x4_gt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_gt); + break; + } case SIMD_i32x4_gt_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_gt); + break; + } case SIMD_i32x4_le_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_le); + break; + } case SIMD_i32x4_le_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_le); + break; + } case SIMD_i32x4_ge_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_ge); + break; + } case SIMD_i32x4_ge_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i32x4_ge); break; } /* f32x4 comparison operations */ case SIMD_f32x4_eq: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_eq); + break; + } case SIMD_f32x4_ne: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_ne); + break; + } case SIMD_f32x4_lt: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_lt); + break; + } case SIMD_f32x4_gt: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_gt); + break; + } case SIMD_f32x4_le: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_le); + break; + } case SIMD_f32x4_ge: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_f32x4_ge); break; } /* f64x2 comparison operations */ case SIMD_f64x2_eq: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_eq); + break; + } case SIMD_f64x2_ne: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_ne); + break; + } case SIMD_f64x2_lt: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_lt); + break; + } case SIMD_f64x2_gt: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_gt); + break; + } case SIMD_f64x2_le: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_le); + break; + } case SIMD_f64x2_ge: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_f32x4_ge); break; } @@ -5948,10 +6300,23 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, v1.i64x2[1] ^ v2.i64x2[1]); break; } + // TODO: Test case SIMD_v128_bitselect: { - wasm_set_exception(module, "unsupported SIMD opcode"); - break; + V128 v1 = POP_V128(); + V128 v2 = POP_V128(); + V128 v3 = POP_V128(); + addr_ret = GET_OFFSET(); + + simde_v128_t simde_result = simde_wasm_v128_bitselect( + SIMD_V128_TO_SIMDE_V128(v1), + SIMD_V128_TO_SIMDE_V128(v2), + SIMD_V128_TO_SIMDE_V128(v3)); + + V128 result; + SIMDE_V128_TO_SIMD_V128(simde_result, result); + + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); } case SIMD_v128_any_true: { @@ -5962,6 +6327,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } + // TODO: /* load lane operations */ case SIMD_v128_load8_lane: case SIMD_v128_load16_lane: @@ -5978,209 +6344,808 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, break; } +#define SIMD_SINGLE_OP(simde_func) \ + do { \ + V128 v1 = POP_V128(); \ + addr_ret = GET_OFFSET(); \ + \ + simde_v128_t simde_result = simde_func(SIMD_V128_TO_SIMDE_V128(v1)); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) + /* Float conversion */ case SIMD_f32x4_demote_f64x2_zero: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_demote_f64x2_zero); + break; + } case SIMD_f64x2_promote_low_f32x4_zero: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_SINGLE_OP(simde_wasm_f64x2_promote_low_f32x4); break; } /* i8x16 operations */ case SIMD_i8x16_abs: + { + SIMD_SINGLE_OP(simde_wasm_i8x16_abs); + break; + } case SIMD_i8x16_neg: + { + SIMD_SINGLE_OP(simde_wasm_i8x16_neg); + break; + } case SIMD_i8x16_popcnt: + { + SIMD_SINGLE_OP(simde_wasm_i8x16_popcnt); + break; + } case SIMD_i8x16_all_true: { - V128 v = POP_V128(); - uint8_t *bytes = (uint8_t *)&v; - bool all_true = true; + V128 v1 = POP_V128(); - for (int i = 0; i < 16; i++) { - if (bytes[i] == 0) { - all_true = false; - break; - } - } + bool result = simde_wasm_i8x16_all_true( + SIMD_V128_TO_SIMDE_V128(v1)); - PUSH_I32(all_true ? 1 : 0); + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; break; } case SIMD_i8x16_bitmask: + { + V128 v1 = POP_V128(); + + uint32_t result = simde_wasm_i8x16_bitmask( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i8x16_narrow_i16x8_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_narrow_i16x8); + break; + } case SIMD_i8x16_narrow_i16x8_u: - case SIMD_f32x4_ceil: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_narrow_i16x8); + break; + } + case SIMD_f32x4_ceil: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_ceil); + break; + } case SIMD_f32x4_floor: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_floor); + break; + } case SIMD_f32x4_trunc: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_trunc); + break; + } case SIMD_f32x4_nearest: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_nearest); + break; + } +// TODO: Check count? +#define SIMD_LANE_SHIFT(simde_func) \ + do { \ + int32 count = POP_I32(); \ + V128 v1 = POP_V128(); \ + addr_ret = GET_OFFSET(); \ + \ + simde_v128_t simde_result = \ + simde_func(SIMD_V128_TO_SIMDE_V128(v1), count); \ + \ + V128 result; \ + SIMDE_V128_TO_SIMD_V128(simde_result, result); \ + \ + PUT_V128_TO_ADDR(frame_lp + addr_ret, result); \ + } while (0) case SIMD_i8x16_shl: + { + SIMD_LANE_SHIFT(simde_wasm_i8x16_shl); + break; + } case SIMD_i8x16_shr_s: + { + SIMD_LANE_SHIFT(simde_wasm_i8x16_shr); + break; + } case SIMD_i8x16_shr_u: + { + SIMD_LANE_SHIFT(simde_wasm_i8x16_shr); + break; + } case SIMD_i8x16_add: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_add); + break; + } case SIMD_i8x16_add_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_add_sat); + break; + } case SIMD_i8x16_add_sat_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_add_sat); + break; + } case SIMD_i8x16_sub: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_sub); + break; + } case SIMD_i8x16_sub_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_sub_sat); + break; + } case SIMD_i8x16_sub_sat_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_sub_sat); + break; + } case SIMD_f64x2_ceil: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_ceil); + break; + } case SIMD_f64x2_floor: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_floor); + break; + } case SIMD_i8x16_min_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_min); + break; + } case SIMD_i8x16_min_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_min); + break; + } case SIMD_i8x16_max_s: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_max); + break; + } case SIMD_i8x16_max_u: + { + SIMD_DOUBLE_OP(simde_wasm_i8x16_max); + break; + } case SIMD_f64x2_trunc: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_trunc); + break; + } case SIMD_i8x16_avgr_u: + { + SIMD_DOUBLE_OP(simde_wasm_u8x16_avgr); + break; + } case SIMD_i16x8_extadd_pairwise_i8x16_s: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extadd_pairwise_i8x16); + break; + } case SIMD_i16x8_extadd_pairwise_i8x16_u: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extadd_pairwise_i8x16); + break; + } case SIMD_i32x4_extadd_pairwise_i16x8_s: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extadd_pairwise_i16x8); + break; + } case SIMD_i32x4_extadd_pairwise_i16x8_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_SINGLE_OP(simde_wasm_i32x4_extadd_pairwise_i16x8); break; } /* i16x8 operations */ case SIMD_i16x8_abs: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_abs); + break; + } case SIMD_i16x8_neg: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_neg); + break; + } case SIMD_i16x8_q15mulr_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_q15mulr_sat); + break; + } case SIMD_i16x8_all_true: + { + V128 v1 = POP_V128(); + + bool result = simde_wasm_i16x8_all_true( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i16x8_bitmask: + { + V128 v1 = POP_V128(); + + uint32_t result = simde_wasm_i16x8_bitmask( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i16x8_narrow_i32x4_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_narrow_i32x4); + break; + } case SIMD_i16x8_narrow_i32x4_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_narrow_i32x4); + break; + } case SIMD_i16x8_extend_low_i8x16_s: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extend_low_i8x16); + break; + } case SIMD_i16x8_extend_high_i8x16_s: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extend_high_i8x16); + break; + } case SIMD_i16x8_extend_low_i8x16_u: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extend_low_i8x16); + break; + } case SIMD_i16x8_extend_high_i8x16_u: + { + SIMD_SINGLE_OP(simde_wasm_i16x8_extend_high_i8x16); + break; + } case SIMD_i16x8_shl: + { + SIMD_LANE_SHIFT(simde_wasm_i16x8_shl); + break; + } case SIMD_i16x8_shr_s: + { + SIMD_LANE_SHIFT(simde_wasm_i16x8_shr); + break; + } case SIMD_i16x8_shr_u: + { + SIMD_LANE_SHIFT(simde_wasm_i16x8_shr); + break; + } case SIMD_i16x8_add: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_add); + break; + } case SIMD_i16x8_add_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_add_sat); + break; + } case SIMD_i16x8_add_sat_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_add_sat); + break; + } case SIMD_i16x8_sub: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_sub); + break; + } case SIMD_i16x8_sub_sat_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_sub_sat); + break; + } case SIMD_i16x8_sub_sat_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_sub_sat); + break; + } case SIMD_f64x2_nearest: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_nearest); + break; + } case SIMD_i16x8_mul: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_mul); + break; + } case SIMD_i16x8_min_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_min); + break; + } case SIMD_i16x8_min_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_min); + break; + } case SIMD_i16x8_max_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_max); + break; + } case SIMD_i16x8_max_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_max); + break; + } case SIMD_i16x8_avgr_u: + { + SIMD_DOUBLE_OP(simde_wasm_u16x8_avgr); + break; + } case SIMD_i16x8_extmul_low_i8x16_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_low_i8x16); + break; + } case SIMD_i16x8_extmul_high_i8x16_s: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_high_i8x16); + break; + } case SIMD_i16x8_extmul_low_i8x16_u: + { + SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_low_i8x16); + break; + } case SIMD_i16x8_extmul_high_i8x16_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i16x8_extmul_high_i8x16); break; } /* i32x4 operations */ case SIMD_i32x4_abs: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_abs); + break; + } case SIMD_i32x4_neg: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_neg); + break; + } case SIMD_i32x4_all_true: + { + V128 v1 = POP_V128(); + + bool result = simde_wasm_i32x4_all_true( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i32x4_bitmask: + { + V128 v1 = POP_V128(); + + uint32_t result = simde_wasm_i32x4_bitmask( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i32x4_extend_low_i16x8_s: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extend_low_i16x8); + break; + } case SIMD_i32x4_extend_high_i16x8_s: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extend_high_i16x8); + break; + } case SIMD_i32x4_extend_low_i16x8_u: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extend_low_i16x8); + break; + } case SIMD_i32x4_extend_high_i16x8_u: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_extend_high_i16x8); + break; + } case SIMD_i32x4_shl: + { + SIMD_LANE_SHIFT(simde_wasm_i32x4_shl); + break; + } case SIMD_i32x4_shr_s: + { + SIMD_LANE_SHIFT(simde_wasm_i32x4_shr); + break; + } case SIMD_i32x4_shr_u: + { + SIMD_LANE_SHIFT(simde_wasm_i32x4_shr); + break; + } case SIMD_i32x4_add: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_add); + break; + } case SIMD_i32x4_sub: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_sub); + break; + } case SIMD_i32x4_mul: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_mul); + break; + } case SIMD_i32x4_min_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_min); + break; + } case SIMD_i32x4_min_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_min); + break; + } case SIMD_i32x4_max_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_max); + break; + } case SIMD_i32x4_max_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_max); + break; + } case SIMD_i32x4_dot_i16x8_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_dot_i16x8); + break; + } case SIMD_i32x4_extmul_low_i16x8_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_low_i16x8); + break; + } case SIMD_i32x4_extmul_high_i16x8_s: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_high_i16x8); + break; + } case SIMD_i32x4_extmul_low_i16x8_u: + { + SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_low_i16x8); + break; + } case SIMD_i32x4_extmul_high_i16x8_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i32x4_extmul_high_i16x8); break; } /* i64x2 operations */ case SIMD_i64x2_abs: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_abs); + break; + } case SIMD_i64x2_neg: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_neg); + break; + } case SIMD_i64x2_all_true: + { + V128 v1 = POP_V128(); + + bool result = simde_wasm_i64x2_all_true( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i64x2_bitmask: + { + V128 v1 = POP_V128(); + + uint32_t result = simde_wasm_i64x2_bitmask( + SIMD_V128_TO_SIMDE_V128(v1)); + + addr_ret = GET_OFFSET(); + frame_lp[addr_ret] = result; + break; + } case SIMD_i64x2_extend_low_i32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_extend_low_i32x4); + break; + } case SIMD_i64x2_extend_high_i32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_extend_high_i32x4); + break; + } case SIMD_i64x2_extend_low_i32x4_u: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_extend_low_i32x4); + break; + } case SIMD_i64x2_extend_high_i32x4_u: + { + SIMD_SINGLE_OP(simde_wasm_i64x2_extend_high_i32x4); + break; + } + + // TODO: Verify count works case SIMD_i64x2_shl: + { + SIMD_LANE_SHIFT(simde_wasm_i64x2_shl); + break; + } case SIMD_i64x2_shr_s: + { + SIMD_LANE_SHIFT(simde_wasm_i64x2_shr); + break; + } case SIMD_i64x2_shr_u: + { + SIMD_LANE_SHIFT(simde_wasm_i64x2_shr); + break; + } case SIMD_i64x2_add: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_add); + break; + } case SIMD_i64x2_sub: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_sub); + break; + } case SIMD_i64x2_mul: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_mul); + break; + } case SIMD_i64x2_eq: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_eq); + break; + } case SIMD_i64x2_ne: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_ne); + break; + } case SIMD_i64x2_lt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_lt); + break; + } case SIMD_i64x2_gt_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_gt); + break; + } case SIMD_i64x2_le_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_le); + break; + } case SIMD_i64x2_ge_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_ge); + break; + } case SIMD_i64x2_extmul_low_i32x4_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_low_i32x4); + break; + } case SIMD_i64x2_extmul_high_i32x4_s: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_high_i32x4); + break; + } case SIMD_i64x2_extmul_low_i32x4_u: + { + SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_low_i32x4); + break; + } case SIMD_i64x2_extmul_high_i32x4_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_i64x2_extmul_high_i32x4); break; } /* f32x4 opertions */ case SIMD_f32x4_abs: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_abs); + break; + } case SIMD_f32x4_neg: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_neg); + break; + } case SIMD_f32x4_sqrt: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_sqrt); + break; + } case SIMD_f32x4_add: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_add); + break; + } case SIMD_f32x4_sub: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_sub); + break; + } case SIMD_f32x4_mul: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_mul); + break; + } case SIMD_f32x4_div: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_div); + break; + } case SIMD_f32x4_min: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_min); + break; + } case SIMD_f32x4_max: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_max); + break; + } case SIMD_f32x4_pmin: + { + SIMD_DOUBLE_OP(simde_wasm_f32x4_pmin); + break; + } case SIMD_f32x4_pmax: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_f32x4_pmax); break; } /* f64x2 operations */ case SIMD_f64x2_abs: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_abs); + break; + } case SIMD_f64x2_neg: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_neg); + break; + } case SIMD_f64x2_sqrt: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_sqrt); + break; + } case SIMD_f64x2_add: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_add); + break; + } case SIMD_f64x2_sub: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_sub); + break; + } case SIMD_f64x2_mul: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_mul); + break; + } case SIMD_f64x2_div: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_div); + break; + } case SIMD_f64x2_min: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_min); + break; + } case SIMD_f64x2_max: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_max); + break; + } case SIMD_f64x2_pmin: + { + SIMD_DOUBLE_OP(simde_wasm_f64x2_pmin); + break; + } case SIMD_f64x2_pmax: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_DOUBLE_OP(simde_wasm_f64x2_pmax); break; } /* Conversion operations */ case SIMD_i32x4_trunc_sat_f32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f32x4); + break; + } case SIMD_i32x4_trunc_sat_f32x4_u: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f32x4); + break; + } case SIMD_f32x4_convert_i32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_convert_i32x4); + break; + } case SIMD_f32x4_convert_i32x4_u: + { + SIMD_SINGLE_OP(simde_wasm_f32x4_convert_i32x4); + break; + } case SIMD_i32x4_trunc_sat_f64x2_s_zero: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f64x2_zero); + break; + } case SIMD_i32x4_trunc_sat_f64x2_u_zero: + { + SIMD_SINGLE_OP(simde_wasm_i32x4_trunc_sat_f64x2_zero); + break; + } case SIMD_f64x2_convert_low_i32x4_s: + { + SIMD_SINGLE_OP(simde_wasm_f64x2_convert_low_i32x4); + break; + } case SIMD_f64x2_convert_low_i32x4_u: { - wasm_set_exception(module, "unsupported SIMD opcode"); + SIMD_SINGLE_OP(simde_wasm_f64x2_convert_low_i32x4); break; } @@ -6190,6 +7155,7 @@ wasm_interp_call_func_bytecode(WASMModuleInstance *module, HANDLE_OP_END(); } #endif + HANDLE_OP(WASM_OP_CALL) { #if WASM_ENABLE_THREAD_MGR != 0 diff --git a/core/iwasm/interpreter/wasm_loader.c b/core/iwasm/interpreter/wasm_loader.c index 47995e03f6..ae6a92fa04 100644 --- a/core/iwasm/interpreter/wasm_loader.c +++ b/core/iwasm/interpreter/wasm_loader.c @@ -15080,6 +15080,10 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, read_leb_mem_offset(p, p_end, mem_offset); /* offset */ +#if WASM_ENABLE_FAST_INTERP != 0 + emit_uint32(loader_ctx, mem_offset); +#endif + POP_AND_PUSH(mem_offset_type, VALUE_TYPE_V128); #if WASM_ENABLE_JIT != 0 || WASM_ENABLE_WAMR_COMPILER != 0 func->has_memory_operations = true; @@ -15099,6 +15103,10 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, read_leb_mem_offset(p, p_end, mem_offset); /* offset */ +#if WASM_ENABLE_FAST_INTERP != 0 + emit_uint32(loader_ctx, mem_offset); +#endif + POP_V128(); POP_MEM_OFFSET(); #if WASM_ENABLE_JIT != 0 || WASM_ENABLE_WAMR_COMPILER != 0 @@ -15128,12 +15136,17 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, CHECK_BUF1(p, p_end, 16); mask = read_i8x16(p, error_buf, error_buf_size); - p += 16; if (!check_simd_shuffle_mask(mask, error_buf, error_buf_size)) { goto fail; } - +#if WASM_ENABLE_FAST_INTERP != 0 + uint64 high, low; + wasm_runtime_read_v128(p, &high, &low); + emit_uint64(loader_ctx, high); + emit_uint64(loader_ctx, low); +#endif + p += 16; POP2_AND_PUSH(VALUE_TYPE_V128, VALUE_TYPE_V128); break; } @@ -15204,7 +15217,6 @@ wasm_loader_prepare_bytecode(WASMModule *module, WASMFunction *func, error_buf_size)) { goto fail; } - if (replace[opcode1 - SIMD_i8x16_extract_lane_s]) { if (!(wasm_loader_pop_frame_ref( loader_ctx, diff --git a/core/iwasm/interpreter/wasm_opcode.h b/core/iwasm/interpreter/wasm_opcode.h index 1424840e79..75d30c9b31 100644 --- a/core/iwasm/interpreter/wasm_opcode.h +++ b/core/iwasm/interpreter/wasm_opcode.h @@ -779,10 +779,10 @@ typedef enum WASMAtomicEXTOpcode { #else #define DEF_DEBUG_BREAK_HANDLE() #endif - #define SET_GOTO_TABLE_ELEM(opcode) [opcode] = HANDLE_OPCODE(opcode) -#if (WASM_ENABLE_JIT != 0 || WASM_ENABLE_FAST_INTERP != 0) \ +#if (WASM_ENABLE_JIT != 0 \ + || (WASM_ENABLE_FAST_INTERP != 0 && WASM_ENABLE_SIMDE != 0)) \ && WASM_ENABLE_SIMD != 0 #define SET_GOTO_TABLE_SIMD_PREFIX_ELEM() \ SET_GOTO_TABLE_ELEM(WASM_OP_SIMD_PREFIX), diff --git a/core/iwasm/libraries/simde/simde.cmake b/core/iwasm/libraries/simde/simde.cmake new file mode 100644 index 0000000000..b36e356945 --- /dev/null +++ b/core/iwasm/libraries/simde/simde.cmake @@ -0,0 +1,23 @@ +# Copyright (C) 2024 Amazon Inc. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# simde is a header only library + +set (LIB_SIMDE_DIR ${CMAKE_CURRENT_LIST_DIR}) + +if (WAMR_BUILD_TARGET MATCHES "AARCH64.*" OR "ARM.*") + add_definitions (-DWASM_ENABLE_SIMDE=1) +endif () + +include_directories(${LIB_SIMDE_DIR} ${LIB_SIMDE_DIR}/simde) + +include(FetchContent) + +FetchContent_Declare( + simde + GIT_REPOSITORY https://github.com/simd-everywhere/simde + GIT_TAG v0.8.2 +) + +message("-- Fetching simde ..") +FetchContent_MakeAvailable(simde) +include_directories("${simde_SOURCE_DIR}")