From 5824cbae670b47ed92a99a83276a6e0a0ac31db4 Mon Sep 17 00:00:00 2001 From: Yasumasa Suenaga Date: Sat, 28 Dec 2024 18:03:03 +0900 Subject: [PATCH] Add MOVQ on SSE2 --- .../ffmasm/amd64/AMD64AsmBuilder.java | 6 +- .../yasuenag/ffmasm/amd64/SSEAsmBuilder.java | 46 ++++++++++-- .../ffmasm/test/amd64/SSEAsmTest.java | 71 +++++++++++++++++++ 3 files changed, 118 insertions(+), 5 deletions(-) diff --git a/src/main/java/com/yasuenag/ffmasm/amd64/AMD64AsmBuilder.java b/src/main/java/com/yasuenag/ffmasm/amd64/AMD64AsmBuilder.java index 06f0866..dca8d70 100644 --- a/src/main/java/com/yasuenag/ffmasm/amd64/AMD64AsmBuilder.java +++ b/src/main/java/com/yasuenag/ffmasm/amd64/AMD64AsmBuilder.java @@ -256,12 +256,16 @@ else if(reg.width() == 64){ } protected void emitREXOp(Register r, Register m){ + emitREXOp(r, m, false); + } + + protected void emitREXOp(Register r, Register m, boolean forceREXW){ if(r.width() == 16){ // Ops for 16 bits operands (66H) byteBuf.put((byte)0x66); } else{ - byte rexw = (r.width() == 64) ? (byte)0b1000 : (byte)0; + byte rexw = ((r.width() == 64) || forceREXW) ? (byte)0b1000 : (byte)0; byte rexr = (byte)(((r.encoding() >> 3) << 2) & 0b0100); byte rexb = (byte)((m.encoding() >> 3) & 0b0001); byte rex = (byte)(rexw | rexr | rexb); diff --git a/src/main/java/com/yasuenag/ffmasm/amd64/SSEAsmBuilder.java b/src/main/java/com/yasuenag/ffmasm/amd64/SSEAsmBuilder.java index f963939..d683d47 100644 --- a/src/main/java/com/yasuenag/ffmasm/amd64/SSEAsmBuilder.java +++ b/src/main/java/com/yasuenag/ffmasm/amd64/SSEAsmBuilder.java @@ -121,9 +121,17 @@ public SSEAsmBuilder movdquMR(Register r, Register m, OptionalInt disp){ return movdq(r, m, disp, (byte)0xf3, (byte)0x7f); } - private SSEAsmBuilder movDorQ(Register r, Register m, OptionalInt disp, byte secondOpcode){ + private SSEAsmBuilder movDInternal(Register r, Register m, OptionalInt disp, byte secondOpcode){ + return movDorQ(r, m, disp, secondOpcode, false); + } + + private SSEAsmBuilder movQInternal(Register r, Register m, OptionalInt disp, byte secondOpcode){ + return movDorQ(r, m, disp, secondOpcode, true); + } + + private SSEAsmBuilder movDorQ(Register r, Register m, OptionalInt disp, byte secondOpcode, boolean isQWORD){ byteBuf.put((byte)0x66); // prefix - emitREXOp(r, m); + emitREXOp(r, m, isQWORD); byteBuf.put((byte)0x0f); // escape opcode byteBuf.put(secondOpcode); var mode = emitModRM(r, m, disp); @@ -144,7 +152,7 @@ private SSEAsmBuilder movDorQ(Register r, Register m, OptionalInt disp, byte sec * @return This instance */ public SSEAsmBuilder movdRM(Register r, Register m, OptionalInt disp){ - return movDorQ(r, m, disp, (byte)0x6e); + return movDInternal(r, m, disp, (byte)0x6e); } /** @@ -159,7 +167,37 @@ public SSEAsmBuilder movdRM(Register r, Register m, OptionalInt disp){ * @return This instance */ public SSEAsmBuilder movdMR(Register r, Register m, OptionalInt disp){ - return movDorQ(r, m, disp, (byte)0x7e); + return movDInternal(r, m, disp, (byte)0x7e); + } + + /** + * Move quadword from r/m64 to xmm. + * Opcode: 66 REX.W 0F 6E /r + * Instruction: MOVQ xmm, r/m64 + * Op/En: A + * + * @param r "r" register + * @param m "r/m" register + * @param disp Displacement. Set "empty" if this operation is reg-reg. + * @return This instance + */ + public SSEAsmBuilder movqRM(Register r, Register m, OptionalInt disp){ + return movQInternal(r, m, disp, (byte)0x6e); + } + + /** + * Move quadword from xmm register to r/m64. + * Opcode: 66 REX.W 0F 7E /r + * Instruction: MOVQ r/m64, xmm + * Op/En: B + * + * @param r "r" register + * @param m "r/m" register + * @param disp Displacement. Set "empty" if this operation is reg-reg. + * @return This instance + */ + public SSEAsmBuilder movqMR(Register r, Register m, OptionalInt disp){ + return movQInternal(r, m, disp, (byte)0x7e); } } diff --git a/src/test/java/com/yasuenag/ffmasm/test/amd64/SSEAsmTest.java b/src/test/java/com/yasuenag/ffmasm/test/amd64/SSEAsmTest.java index 82740c6..d33477c 100644 --- a/src/test/java/com/yasuenag/ffmasm/test/amd64/SSEAsmTest.java +++ b/src/test/java/com/yasuenag/ffmasm/test/amd64/SSEAsmTest.java @@ -182,4 +182,75 @@ public void testMOVD_B(){ } } + /** + * Tests MOVQ A + */ + @Test + @EnabledOnOs({OS.LINUX, OS.WINDOWS}) + public void testMOVQ_A(){ + try(var seg = new CodeSegment()){ + var desc = FunctionDescriptor.of( + ValueLayout.JAVA_DOUBLE, // return value + ValueLayout.ADDRESS // 1st argument + ); + var method = AMD64AsmBuilder.create(SSEAsmBuilder.class, seg, desc) + /* push %rbp */ .push(Register.RBP) + /* mov %rsp, %rbp */ .movMR(Register.RSP, Register.RBP, OptionalInt.empty()) + .cast(SSEAsmBuilder.class) + /* movq (arg1), %xmm0 */ .movqRM(Register.XMM0, argReg.arg1(), OptionalInt.of(0)) + /* leave */ .leave() + /* ret */ .ret() + .build(); + + double expected = 1.1d; + var arena = Arena.ofAuto(); + MemorySegment src = arena.allocate(ValueLayout.JAVA_DOUBLE); + src.set(ValueLayout.JAVA_DOUBLE, 0, expected); + + double actual = (double)method.invoke(src); + + Assertions.assertEquals(expected, actual); + } + catch(Throwable t){ + Assertions.fail(t); + } + } + + /** + * Tests MOVQ B + */ + @Test + @EnabledOnOs({OS.LINUX, OS.WINDOWS}) + public void testMOVQ_B(){ + try(var seg = new CodeSegment()){ + var arena = Arena.ofAuto(); + MemorySegment dest = arena.allocate(ValueLayout.JAVA_DOUBLE); + + var desc = FunctionDescriptor.ofVoid( + ValueLayout.JAVA_DOUBLE // 1st argument + ); + var method = AMD64AsmBuilder.create(SSEAsmBuilder.class, seg, desc) + /* push %rbp */ .push(Register.RBP) + /* mov %rsp, %rbp */ .movMR(Register.RSP, Register.RBP, OptionalInt.empty()) + // Mixed argument order (int, fp) is different between Windows and Linux. + // Thus address is loaded from immediate value. + /* mov addr, %rax */ .movImm(Register.RAX, dest.address()) + .cast(SSEAsmBuilder.class) + /* movq %xmm0, (%rax) */ .movqMR(Register.XMM0, Register.RAX, OptionalInt.of(0)) + /* leave */ .leave() + /* ret */ .ret() + .build(); + + double expected = 1.1d; + + method.invoke(expected); + double actual = dest.get(ValueLayout.JAVA_DOUBLE, 0); + + Assertions.assertEquals(expected, actual); + } + catch(Throwable t){ + Assertions.fail(t); + } + } + }