Skip to content

Commit

Permalink
Add MOVDQA/MOVDQU on SSE2
Browse files Browse the repository at this point in the history
  • Loading branch information
YaSuenag committed Dec 25, 2024
1 parent 64c3bf0 commit 970581c
Show file tree
Hide file tree
Showing 3 changed files with 197 additions and 3 deletions.
4 changes: 2 additions & 2 deletions src/main/java/com/yasuenag/ffmasm/amd64/AMD64AsmBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ else if(dispAsInt <= 0xff){
return mode;
}

private void emitDisp(byte mode, OptionalInt disp, Register m){
protected void emitDisp(byte mode, OptionalInt disp, Register m){
if((mode != 0b11) && (m == Register.RSP)){
// We should add SIB byte.
//
Expand Down Expand Up @@ -255,7 +255,7 @@ else if(reg.width() == 64){
return this;
}

private void emitREXOp(Register r, Register m){
protected void emitREXOp(Register r, Register m){
if(r.width() == 16){
// Ops for 16 bits operands (66H)
byteBuf.put((byte)0x66);
Expand Down
82 changes: 81 additions & 1 deletion src/main/java/com/yasuenag/ffmasm/amd64/SSEAsmBuilder.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 Yasumasa Suenaga
* Copyright (C) 2022, 2024, Yasumasa Suenaga
*
* This file is part of ffmasm.
*
Expand All @@ -19,6 +19,7 @@
package com.yasuenag.ffmasm.amd64;

import java.lang.foreign.FunctionDescriptor;
import java.util.OptionalInt;

import com.yasuenag.ffmasm.CodeSegment;
import com.yasuenag.ffmasm.UnsupportedPlatformException;
Expand All @@ -41,4 +42,83 @@ protected SSEAsmBuilder(CodeSegment seg, FunctionDescriptor desc){
super(seg, desc);
}

private SSEAsmBuilder movdq(Register r, Register m, OptionalInt disp, byte prefix, byte secondOpcode){
byteBuf.put(prefix);
emitREXOp(r, m);
byteBuf.put((byte)0x0f); // escape opcode
byteBuf.put(secondOpcode);
var mode = emitModRM(r, m, disp);
emitDisp(mode, disp, m);

return this;
}

/**
* Move aligned packed integer values from xmm2/mem to xmm1.
* Opcode: 66 0F 6F /r
* Instruction: MOVDQA xmm1, xmm2/m128
* Op/En: A
*
* @param r "r" register
* @param m "r/m" register
* @param disp Displacement. Set "empty" if this operation is reg-reg
* then "r/m" have to be a SIMD register.
* Otherwise it has to be 64 bit GPR because it have to be * a memory operand.
* @return This instance
*/
public SSEAsmBuilder movdqaRM(Register r, Register m, OptionalInt disp){
return movdq(r, m, disp, (byte)0x66, (byte)0x6f);
}

/**
* Move aligned packed integer values from xmm1 to xmm2/mem.
* Opcode: 66 0F 7F /r
* Instruction: MOVDQA xmm2/m128, xmm1
* Op/En: B
*
* @param r "r" register
* @param m "r/m" register
* @param disp Displacement. Set "empty" if this operation is reg-reg
* then "r/m" have to be a SIMD register.
* Otherwise it has to be 64 bit GPR because it have to be * a memory operand.
* @return This instance
*/
public SSEAsmBuilder movdqaMR(Register r, Register m, OptionalInt disp){
return movdq(r, m, disp, (byte)0x66, (byte)0x7f);
}

/**
* Move unaligned packed integer values from xmm2/mem128 to xmm1.
* Opcode: F3 0F 6F /r
* Instruction: MOVDQU xmm1, xmm2/m128
* Op/En: A
*
* @param r "r" register
* @param m "r/m" register
* @param disp Displacement. Set "empty" if this operation is reg-reg
* then "r/m" have to be a SIMD register.
* Otherwise it has to be 64 bit GPR because it have to be * a memory operand.
* @return This instance
*/
public SSEAsmBuilder movdquRM(Register r, Register m, OptionalInt disp){
return movdq(r, m, disp, (byte)0xf3, (byte)0x6f);
}

/**
* Move unaligned packed integer values from xmm1 to xmm2/mem128.
* Opcode: F3 0F 7F /r
* Instruction: MOVDQU xmm2/m128, xmm1
* Op/En: B
*
* @param r "r" register
* @param m "r/m" register
* @param disp Displacement. Set "empty" if this operation is reg-reg
* then "r/m" have to be a SIMD register.
* Otherwise it has to be 64 bit GPR because it have to be * a memory operand.
* @return This instance
*/
public SSEAsmBuilder movdquMR(Register r, Register m, OptionalInt disp){
return movdq(r, m, disp, (byte)0xf3, (byte)0x7f);
}

}
114 changes: 114 additions & 0 deletions src/test/java/com/yasuenag/ffmasm/test/amd64/SSEAsmTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Copyright (C) 2024, Yasumasa Suenaga
*
* This file is part of ffmasm.
*
* ffmasm is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ffmasm is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with ffmasm. If not, see <http://www.gnu.org/licenses/>.
*/
package com.yasuenag.ffmasm.test.amd64;

import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledOnOs;
import org.junit.jupiter.api.condition.OS;

import java.lang.foreign.Arena;
import java.lang.foreign.FunctionDescriptor;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.util.OptionalInt;

import com.yasuenag.ffmasm.CodeSegment;
import com.yasuenag.ffmasm.amd64.AMD64AsmBuilder;
import com.yasuenag.ffmasm.amd64.Register;
import com.yasuenag.ffmasm.amd64.SSEAsmBuilder;


public class SSEAsmTest extends TestBase{

/**
* Tests MOVDQA A/B
*/
@Test
@EnabledOnOs({OS.LINUX, OS.WINDOWS})
public void testMOVDQA(){
try(var seg = new CodeSegment()){
var desc = FunctionDescriptor.ofVoid(
ValueLayout.ADDRESS, // 1st argument
ValueLayout.ADDRESS // 2nd argument
);
var method = AMD64AsmBuilder.create(SSEAsmBuilder.class, seg, desc)
/* push %rbp */ .push(Register.RBP)
/* mov %rsp, %rbp */ .movMR(Register.RSP, Register.RBP, OptionalInt.empty())
.cast(SSEAsmBuilder.class)
/* movdqa (arg1), %xmm0 */ .movdqaRM(Register.XMM0, argReg.arg1(), OptionalInt.of(0))
/* movdqa %xmm0, (arg2) */ .movdqaMR(Register.XMM0, argReg.arg2(), OptionalInt.of(0))
/* leave */ .leave()
/* ret */ .ret()
.build();

long[] expected = new long[]{1, 2}; // 64 * 2 = 128 bit
var arena = Arena.ofAuto();
MemorySegment src = arena.allocate(16, 16); // 128 bit
MemorySegment dest = arena.allocate(16, 16); // 128 bit
MemorySegment.copy(expected, 0, src, ValueLayout.JAVA_LONG, 0, expected.length);

method.invoke(src, dest);

Assertions.assertArrayEquals(expected, src.toArray(ValueLayout.JAVA_LONG));
Assertions.assertArrayEquals(expected, dest.toArray(ValueLayout.JAVA_LONG));
}
catch(Throwable t){
Assertions.fail(t);
}
}

/**
* Tests MOVDQU A/B
*/
@Test
@EnabledOnOs({OS.LINUX, OS.WINDOWS})
public void testMOVDQU(){
try(var seg = new CodeSegment()){
var desc = FunctionDescriptor.ofVoid(
ValueLayout.ADDRESS, // 1st argument
ValueLayout.ADDRESS // 2nd argument
);
var method = AMD64AsmBuilder.create(SSEAsmBuilder.class, seg, desc)
/* push %rbp */ .push(Register.RBP)
/* mov %rsp, %rbp */ .movMR(Register.RSP, Register.RBP, OptionalInt.empty())
.cast(SSEAsmBuilder.class)
/* movdqu (arg1), %xmm0 */ .movdquRM(Register.XMM0, argReg.arg1(), OptionalInt.of(0))
/* movdqu %xmm0, (arg2) */ .movdquMR(Register.XMM0, argReg.arg2(), OptionalInt.of(0))
/* leave */ .leave()
/* ret */ .ret()
.build();

long[] expected = new long[]{1, 2}; // 64 * 2 = 128 bit
var arena = Arena.ofAuto();
MemorySegment src = arena.allocate(16, 16); // 128 bit
MemorySegment dest = arena.allocate(16, 16); // 128 bit
MemorySegment.copy(expected, 0, src, ValueLayout.JAVA_LONG, 0, expected.length);

method.invoke(src, dest);

Assertions.assertArrayEquals(expected, src.toArray(ValueLayout.JAVA_LONG));
Assertions.assertArrayEquals(expected, dest.toArray(ValueLayout.JAVA_LONG));
}
catch(Throwable t){
Assertions.fail(t);
}
}

}

0 comments on commit 970581c

Please sign in to comment.