Skip to content

Commit

Permalink
Add VZEROUPPER
Browse files Browse the repository at this point in the history
  • Loading branch information
YaSuenag committed Jan 16, 2025
1 parent ee169b9 commit daa537b
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 4 deletions.
24 changes: 21 additions & 3 deletions src/main/java/com/yasuenag/ffmasm/amd64/AVXAsmBuilder.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022, 2024, Yasumasa Suenaga
* Copyright (C) 2022, 2025, Yasumasa Suenaga
*
* This file is part of ffmasm.
*
Expand Down Expand Up @@ -77,12 +77,16 @@ public byte bytes(){

private void emit2ByteVEXPrefix(Register src1, PP simdPrefix){
byte VEXvvvv = (byte)((~src1.encoding()) & 0b1111);
emit2ByteVEXPrefixWithVVVV(VEXvvvv, src1.width() == 256, simdPrefix);
}

private void emit2ByteVEXPrefixWithVVVV(byte VEXvvvv, boolean is256bit, PP simdPrefix){
byte rexr = (byte)((VEXvvvv >> 3) & 1);
byte is256Bit = (src1.width() == 256) ? (byte)1 : (byte)0;
byte vecLength = is256bit ? (byte)1 : (byte)0;
byteBuf.put((byte)0xC5); // 2-byte VEX
byteBuf.put((byte)( (rexr << 7) | // REX.R
(VEXvvvv << 3) | // VEX.vvvv
(is256Bit << 2) | // Vector Length
(vecLength << 2) | // Vector Length
simdPrefix.prefix() // opcode extension (SIMD prefix)
));
}
Expand Down Expand Up @@ -305,4 +309,18 @@ else if(mode == 0b10){ // reg-mem disp32
return this;
}

/**
* Zero bits in positions 128 and higher of some YMM and ZMM registers.
* Opcode: VEX.128.0F.WIG 77
* Instruction: VZEROUPPER
* Op/En: ZO
*
* @return This instance
*/
public AVXAsmBuilder vzeroupper(){
emit2ByteVEXPrefixWithVVVV((byte)0b1111, false, PP.None);
byteBuf.put((byte)0x77); // VZEROUPPER
return this;
}

}
37 changes: 36 additions & 1 deletion src/test/java/com/yasuenag/ffmasm/test/amd64/AVXAsmTest.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022, 2023, Yasumasa Suenaga
* Copyright (C) 2022, 2025, Yasumasa Suenaga
*
* This file is part of ffmasm.
*
Expand Down Expand Up @@ -243,4 +243,39 @@ public void testPTEST(){
}
}

/**
* Tests VZEROUPPER
*/
@Test
@EnabledOnOs({OS.LINUX, OS.WINDOWS})
public void testVZEROUPPER(){
try(var seg = new CodeSegment()){
var desc = FunctionDescriptor.ofVoid(ValueLayout.ADDRESS);
var method = AMD64AsmBuilder.create(AVXAsmBuilder.class, seg, desc)
/* push %rbp */ .push(Register.RBP)
/* mov %rsp, %rbp */ .movMR(Register.RSP, Register.RBP, OptionalInt.empty())
.cast(AVXAsmBuilder.class)
/* vmovdqa (arg1), %ymm0 */ .vmovdqaMR(Register.YMM0, argReg.arg1(), OptionalInt.of(0))
/* vzeroupper */ .vzeroupper()
/* vmovdqa %ymm0, (arg1) */ .vmovdqaRM(Register.YMM0, argReg.arg1(), OptionalInt.of(0))
/* leave */ .leave()
/* ret */ .ret()
.build();

var arena = Arena.ofAuto();
var mem = arena.allocate(32, 32);
mem.fill((byte)0xff);

//showDebugMessage(seg);
method.invoke(mem);
var actual = mem.toArray(ValueLayout.JAVA_LONG);
var expected = new long[]{0xffffffffffffffffL, 0xffffffffffffffffL, 0L, 0L};

Assertions.assertArrayEquals(expected, actual);
}
catch(Throwable t){
Assertions.fail(t);
}
}

}

0 comments on commit daa537b

Please sign in to comment.