Skip to content

Commit

Permalink
Add first NEON SIMD opcode implementations to fast interpreter (#3859)
Browse files Browse the repository at this point in the history
Add some implementations of SIMD opcodes using NEON instructions.
Tested using:
```wast
(module
  (import "wasi_snapshot_preview1" "proc_exit" (func $proc_exit (param i32)))
  (memory (export "memory") 1)

  (func $assert_true (param v128)
    local.get 0
    v128.any_true 
    i32.eqz
    if
      unreachable
    end
  )
  (func $main (export "_start")
    i32.const 0
    i32.const 32
    memory.grow
    drop

    i32.const 0
    v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
    v128.store

    i32.const 0
    v128.load

    v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
    i8x16.eq
    call $assert_true

    i32.const 16
    v128.const i8x16 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
    v128.store

    i32.const 16
    v128.load
    v128.const i8x16 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
    i8x16.eq
    call $assert_true

    i32.const 0
    v128.load
    v128.const i8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
    i8x16.eq
    call $assert_true
    drop

    i32.const 0
    i32.const 1
    memory.grow
    drop

    i32.const 0
    i64.const 0x7F80FF017E02FE80
    i64.store

    i32.const 0
    v128.load8x8_s

    v128.const i16x8 127 -128 -1 1 126 2 -2 -128

    i16x8.eq
    call $assert_true

    i32.const 0
    i64.const 0x80FE027E01FF807F
    i64.store

    i32.const 0
    v128.load8x8_u

    v128.const i16x8 128 254 2 126 1 255 128 127

    i16x8.eq
    call $assert_true

    i32.const 0
    i64.const 0x8000FFFE7FFF0001
    i64.store

    i32.const 0
    v128.load16x4_s

    v128.const i32x4 -32768 -2 32767 1

    i32x4.eq
    call $assert_true

    i32.const 0
    i64.const 0x8000FFFE7FFF0001 
    i64.store

    i32.const 0
    v128.load16x4_u

    v128.const i32x4 32768 65534 32767 1   

    i32x4.eq
    call $assert_true

    i32.const 0
    i64.const 0x8000000000000001
    i64.store

    i32.const 0
    v128.load32x2_s

    v128.const i64x2 -2147483648 1 

    i64x2.eq
    call $assert_true

    i32.const 0
    i64.const 0x8000000000000001
    i64.store

    i32.const 0
    v128.load32x2_u

    v128.const i64x2 2147483648 1

    i64x2.eq
    call $assert_true

    call $proc_exit
  )
)
```
  • Loading branch information
jammar1 authored Nov 5, 2024
1 parent aceaed6 commit c930c4d
Show file tree
Hide file tree
Showing 8 changed files with 1,128 additions and 47 deletions.
3 changes: 3 additions & 0 deletions build-scripts/config_common.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,9 @@ endif ()
if (WAMR_BUILD_LIB_RATS EQUAL 1)
message (" Lib rats enabled")
endif()
if ((WAMR_BUILD_LIB_SIMDE EQUAL 1))
message (" Lib simde enabled")
endif()
if (WAMR_BUILD_MINI_LOADER EQUAL 1)
add_definitions (-DWASM_ENABLE_MINI_LOADER=1)
message (" WASM mini loader enabled")
Expand Down
4 changes: 4 additions & 0 deletions build-scripts/runtime_lib.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,10 @@ if (WAMR_BUILD_LIB_RATS EQUAL 1)
include (${IWASM_DIR}/libraries/lib-rats/lib_rats.cmake)
endif ()

if (WAMR_BUILD_LIB_SIMDE EQUAL 1)
include (${IWASM_DIR}/libraries/simde/simde.cmake)
endif ()

if (WAMR_BUILD_WASM_CACHE EQUAL 1)
include (${WAMR_ROOT_DIR}/build-scripts/involve_boringssl.cmake)
endif ()
Expand Down
6 changes: 6 additions & 0 deletions core/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,12 @@
#define WASM_ENABLE_SIMD 0
#endif

/* Disable SIMDe (used in the fast interpreter for SIMD opcodes)
unless used elsewhere */
#ifndef WASM_ENABLE_SIMDE
#define WASM_ENABLE_SIMDE 0
#endif

/* GC performance profiling */
#ifndef WASM_ENABLE_GC_PERF_PROFILING
#define WASM_ENABLE_GC_PERF_PROFILING 0
Expand Down
67 changes: 67 additions & 0 deletions core/iwasm/common/wasm_runtime_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,20 @@ STORE_U8(void *addr, uint8_t value)
*(uint8 *)addr = value;
}

static inline void
STORE_V128(void *addr, V128 value)
{
*(V128 *)addr = value;
}

/* For LOAD opcodes */
#define LOAD_I64(addr) (*(int64 *)(addr))
#define LOAD_F64(addr) (*(float64 *)(addr))
#define LOAD_I32(addr) (*(int32 *)(addr))
#define LOAD_U32(addr) (*(uint32 *)(addr))
#define LOAD_I16(addr) (*(int16 *)(addr))
#define LOAD_U16(addr) (*(uint16 *)(addr))
#define LOAD_V128(addr) (*(V128 *)(addr))

#define STORE_PTR(addr, ptr) \
do { \
Expand Down Expand Up @@ -264,7 +271,67 @@ STORE_U16(void *addr, uint16_t value)
((uint8_t *)(addr))[0] = u.u8[0];
((uint8_t *)(addr))[1] = u.u8[1];
}

static inline void
STORE_V128(void *addr, V128 value)
{
uintptr_t addr_ = (uintptr_t)(addr);
union {
V128 val;
uint64 u64[2];
uint32 u32[4];
uint16 u16[8];
uint8 u8[16];
} u;

if ((addr_ & (uintptr_t)15) == 0) {
*(V128 *)addr = value;
}
else {
u.val = value;
if ((addr_ & (uintptr_t)7) == 0) {
((uint64 *)(addr))[0] = u.u64[0];
((uint64 *)(addr))[1] = u.u64[1];
}
else {
bh_assert((addr_ & (uintptr_t)3) == 0);
((uint32 *)addr)[0] = u.u32[0];
((uint32 *)addr)[1] = u.u32[1];
((uint32 *)addr)[2] = u.u32[2];
((uint32 *)addr)[3] = u.u32[3];
}
}
}

/* For LOAD opcodes */
static inline V128
LOAD_V128(void *addr)
{
uintptr_t addr1 = (uintptr_t)addr;
union {
V128 val;
uint64 u64[2];
uint32 u32[4];
uint16 u16[8];
uint8 u8[16];
} u;
if ((addr1 & (uintptr_t)15) == 0)
return *(V128 *)addr;

if ((addr1 & (uintptr_t)7) == 0) {
u.u64[0] = ((uint64 *)addr)[0];
u.u64[1] = ((uint64 *)addr)[1];
}
else {
bh_assert((addr1 & (uintptr_t)3) == 0);
u.u32[0] = ((uint32 *)addr)[0];
u.u32[1] = ((uint32 *)addr)[1];
u.u32[2] = ((uint32 *)addr)[2];
u.u32[3] = ((uint32 *)addr)[3];
}
return u.val;
}

static inline int64
LOAD_I64(void *addr)
{
Expand Down
Loading

0 comments on commit c930c4d

Please sign in to comment.