A bunch more ARM64 encodings (*MIN,*MAX,XTN,SHRN, etc)

This commit is contained in:
Henrik Rydgard 2015-07-11 12:16:26 +02:00
parent f50828a66a
commit 72651835c3
4 changed files with 211 additions and 50 deletions

View file

@ -254,6 +254,16 @@ bool IsImmLogical(uint64_t value, unsigned int width, unsigned int *n, unsigned
return true; return true;
} }
static int EncodeSize(int size) {
switch (size) {
case 8: return 0;
case 16: return 1;
case 32: return 2;
case 64: return 3;
default: return 0;
}
}
void ARM64XEmitter::SetCodePtr(u8* ptr) void ARM64XEmitter::SetCodePtr(u8* ptr)
{ {
m_code = ptr; m_code = ptr;
@ -3074,6 +3084,22 @@ void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{ {
EmitThreeSame(1, size >> 6, 0x1B, Rd, Rn, Rm); EmitThreeSame(1, size >> 6, 0x1B, Rd, Rn, Rm);
} }
void ARM64FloatEmitter::UMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
EmitThreeSame(1, EncodeSize(size), 0xD, Rd, Rn, Rm);
}
void ARM64FloatEmitter::UMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
EmitThreeSame(1, EncodeSize(size), 0xC, Rd, Rn, Rm);
}
void ARM64FloatEmitter::SMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
EmitThreeSame(0, EncodeSize(size), 0xD, Rd, Rn, Rm);
}
void ARM64FloatEmitter::SMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
{
EmitThreeSame(0, EncodeSize(size), 0xC, Rd, Rn, Rm);
}
void ARM64FloatEmitter::FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn) void ARM64FloatEmitter::FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn)
{ {
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0xF, Rd, Rn); Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0xF, Rd, Rn);
@ -3487,67 +3513,51 @@ void ARM64FloatEmitter::UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
UXTL(src_size, Rd, Rn, true); UXTL(src_size, Rd, Rn, true);
} }
static uint32 EncodeImmShiftLeft(u8 src_size, u32 shift) {
return src_size + shift;
}
static uint32 EncodeImmShiftRight(u8 src_size, u32 shift) {
return src_size * 2 - shift;
}
void ARM64FloatEmitter::SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper) void ARM64FloatEmitter::SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
{ {
_assert_msg_(DYNA_REC, shift < src_size, "%s shift amount must less than the element size!", __FUNCTION__); _assert_msg_(DYNA_REC, shift < src_size, "%s shift amount must less than the element size!", __FUNCTION__);
u32 immh = 0; u32 imm = EncodeImmShiftLeft(src_size, shift);
u32 immb = shift & 0xFFF; EmitShiftImm(upper, 0, imm >> 3, imm & 7, 0x14, Rd, Rn);
if (src_size == 8)
{
immh = 1;
}
else if (src_size == 16)
{
immh = 2 | ((shift >> 3) & 1);
}
else if (src_size == 32)
{
immh = 4 | ((shift >> 3) & 3);;
}
EmitShiftImm(upper, 0, immh, immb, 0x14, Rd, Rn);
} }
void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper) void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
{ {
_assert_msg_(DYNA_REC, shift < src_size, "%s shift amount must less than the element size!", __FUNCTION__); _assert_msg_(DYNA_REC, shift < src_size, "%s shift amount must less than the element size!", __FUNCTION__);
u32 immh = 0; u32 imm = EncodeImmShiftLeft(src_size, shift);
u32 immb = shift & 0xFFF; EmitShiftImm(upper, 1, imm >> 3, imm & 7, 0x14, Rd, Rn);
if (src_size == 8)
{
immh = 1;
}
else if (src_size == 16)
{
immh = 2 | ((shift >> 3) & 1);
}
else if (src_size == 32)
{
immh = 4 | ((shift >> 3) & 3);;
}
EmitShiftImm(upper, 1, immh, immb, 0x14, Rd, Rn);
} }
void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper) void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
{ {
_assert_msg_(DYNA_REC, shift < dest_size, "%s shift amount must less than the element size!", __FUNCTION__); _assert_msg_(DYNA_REC, shift < dest_size, "%s shift amount must less than the element size!", __FUNCTION__);
u32 immh = 0; u32 imm = EncodeImmShiftRight(dest_size, shift);
u32 immb = shift & 0xFFF; EmitShiftImm(upper, 0, imm >> 3, imm & 7, 0x10, Rd, Rn);
}
if (dest_size == 8) void ARM64FloatEmitter::SHL(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {
{ _assert_msg_(DYNA_REC, shift < dest_size, "%s shift amount must less than the element size!", __FUNCTION__);
immh = 1; u32 imm = EncodeImmShiftLeft(dest_size, shift);
} EmitShiftImm(IsQuad(Rd), false, imm >> 3, imm & 7, 0xA, Rd, Rn);
else if (dest_size == 16) }
{
immh = 2 | ((shift >> 3) & 1); void ARM64FloatEmitter::USHR(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {
} _assert_msg_(DYNA_REC, shift < dest_size, "%s shift amount must less than the element size!", __FUNCTION__);
else if (dest_size == 32) u32 imm = EncodeImmShiftRight(dest_size, shift);
{ EmitShiftImm(IsQuad(Rd), true, imm >> 3, imm & 7, 0x0, Rd, Rn);
immh = 4 | ((shift >> 3) & 3);; }
}
EmitShiftImm(upper, 1, immh, immb, 0x10, Rd, Rn); void ARM64FloatEmitter::SSHR(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {
_assert_msg_(DYNA_REC, shift < dest_size, "%s shift amount must less than the element size!", __FUNCTION__);
u32 imm = EncodeImmShiftRight(dest_size, shift);
EmitShiftImm(IsQuad(Rd), false, imm >> 3, imm & 7, 0x0, Rd, Rn);
} }
void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper) void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper)

View file

@ -842,6 +842,12 @@ public:
void MOV(ARM64Reg Rd, ARM64Reg Rn) { void MOV(ARM64Reg Rd, ARM64Reg Rn) {
ORR(Rd, Rn, Rn); ORR(Rd, Rn, Rn);
} }
void UMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void UMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void SMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void SMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn); void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn); void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn);
void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn); void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn);
@ -917,6 +923,10 @@ public:
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn); void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
void UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn); void UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
void SHL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void USHR(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
void SSHR(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
// vector x indexed element // vector x indexed element
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index); void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
void FMLA(u8 esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index); void FMLA(u8 esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);

View file

@ -561,7 +561,7 @@ static void FPandASIMD1(uint32_t w, uint64_t addr, Instruction *instr) {
opname = !(sz & 1) ? "bit" : "bif"; opname = !(sz & 1) ? "bit" : "bif";
} }
} }
int size = (fp ? ((sz & 1) ? 64 : 32) : (sz << 3)); int size = (fp ? ((sz & 1) ? 64 : 32) : (8 << sz));
if (opname != nullptr) { if (opname != nullptr) {
if (!nosize) { if (!nosize) {
@ -583,8 +583,109 @@ static void FPandASIMD1(uint32_t w, uint64_t addr, Instruction *instr) {
break; break;
case 2: case 2:
if (((w >> 17) & 0xf) == 0) { if (((w >> 17) & 0xf) == 0) {
// Very similar to scalar two-reg misc. can we share code? int opcode = (w >> 12) & 0x1F;
snprintf(instr->text, sizeof(instr->text), "(asimd vector two-reg misc %08x)", w); int sz = (w >> 22) & 3;
int Q = GetQ(w);
int U = GetU(w);
const char *opname = nullptr;
bool narrow = false;
if (!U) {
switch (opcode) {
case 0: opname = "rev64"; break;
case 1: opname = "rev16"; break;
case 2: opname = "saddlp"; break;
case 3: opname = "suqadd"; break;
case 4: opname = "cls"; break;
case 5: opname = "cnt"; break;
case 6: opname = "sadalp"; break;
case 7: opname = "sqabs"; break;
case 8: opname = "cmgt"; break;
case 9: opname = "cmeq"; break;
case 0xA: opname = "cmlt"; break;
case 0xB: opname = "abs"; break;
case 0x12: opname = "xtn"; narrow = true; break;
case 0x14: opname = "sqxtn"; narrow = true; break;
default:
if (!(sz & 0x2)) {
switch (opcode) {
case 0x16: opname = "fcvtn"; break;
case 0x17: opname = "fcvtl"; break;
case 0x18: opname = "frintn"; break;
case 0x19: opname = "frintm"; break;
case 0x1a: opname = "fcvtns"; break;
case 0x1b: opname = "fcvtms"; break;
case 0x1c: opname = "fcvtas"; break;
case 0x1d: opname = "scvtf"; break;
}
} else {
switch (opcode) {
case 0xc: opname = "fcmgt"; break;
case 0xd: opname = "fcmeq"; break;
case 0xe: opname = "fcmlt"; break;
case 0xf: opname = "fabs"; break;
case 0x18: opname = "frintp"; break;
case 0x19: opname = "frintz"; break;
case 0x1a: opname = "fcvtps"; break;
case 0x1b: opname = "fcvtzs"; break;
case 0x1c: opname = "urepce"; break;
case 0x1d: opname = "frepce"; break;
}
}
}
} else {
switch (opcode) {
case 0: opname = "rev32"; break;
case 2: opname = "uaddlp"; break;
case 3: opname = "usqadd"; break;
case 4: opname = "clz"; break;
case 6: opname = "uadalp"; break;
case 7: opname = "sqneg"; break;
case 8: opname = "cmge"; break; // with zero
case 9: opname = "cmle"; break; // with zero
case 0xB: opname = "neg"; break;
case 0x12: opname = "sqxtun"; narrow = true; break;
case 0x13: opname = "shll"; break;
case 0x14: opname = "uqxtn"; narrow = true; break;
case 5: if (sz == 0) opname = "not"; else opname = "rbit"; break;
default:
if (!(sz & 0x2)) {
switch (opcode) {
case 0x16: opname = "fcvtxn"; break;
case 0x18: opname = "frinta"; break;
case 0x19: opname = "frintx"; break;
case 0x1a: opname = "fcvtnu"; break;
case 0x1b: opname = "fcvtmu"; break;
case 0x1c: opname = "fcvtau"; break;
case 0x1d: opname = "ucvtf"; break;
}
} else {
switch (opcode) {
case 0xC: opname = "fcmge"; break; // with zero
case 0xD: opname = "fcmge"; break; // with zero
case 0xF: opname = "fneg"; break;
case 0x19: opname = "frinti"; break;
case 0x1a: opname = "fcvtpu"; break;
case 0x1b: opname = "fcvtzu"; break;
case 0x1c: opname = "ursqrte"; break;
case 0x1d: opname = "frsqrte"; break;
case 0x1f: opname = "fsqrt"; break;
}
}
}
}
if (opname) {
if (narrow) {
int esize = 8 << sz;
const char *two = ""; // todo
snprintf(instr->text, sizeof(instr->text), "%s%s.%d.%d d%d, q%d", opname, two, esize, esize * 2, Rd, Rn);
} else {
snprintf(instr->text, sizeof(instr->text), "%s", opname);
}
} else {
// Very similar to scalar two-reg misc. can we share code?
snprintf(instr->text, sizeof(instr->text), "(asimd vector two-reg misc %08x)", w);
}
} else if (((w >> 17) & 0xf) == 1) { } else if (((w >> 17) & 0xf) == 1) {
snprintf(instr->text, sizeof(instr->text), "(asimd across lanes %08x)", w); snprintf(instr->text, sizeof(instr->text), "(asimd across lanes %08x)", w);
} else { } else {
@ -653,6 +754,20 @@ static void FPandASIMD1(uint32_t w, uint64_t addr, Instruction *instr) {
int shift = 2 * esize - ((immh << 3) | immb); int shift = 2 * esize - ((immh << 3) | immb);
int r = Q ? 'q' : 'd'; int r = Q ? 'q' : 'd';
snprintf(instr->text, sizeof(instr->text), "%ccvtf %c%d.s, %c%d.s, #%d", U ? 'u' : 's', r, Rd, r, Rn, shift); snprintf(instr->text, sizeof(instr->text), "%ccvtf %c%d.s, %c%d.s, #%d", U ? 'u' : 's', r, Rd, r, Rn, shift);
} else if (opname && (!strcmp(opname, "ushr") || !strcmp(opname, "sshr"))) {
int esize = (8 << HighestSetBit(immh));
int shift = esize * 2 - ((immh << 3) | immb);
int r = Q ? 'q' : 'd';
snprintf(instr->text, sizeof(instr->text), "%s.%d %c%d, %c%d, #%d", opname, esize, r, Rd, r, Rn, shift);
} else if (opname && (!strcmp(opname, "rshrn") || !strcmp(opname, "shrn"))) {
int esize = (8 << HighestSetBit(immh));
int shift = esize * 2 - ((immh << 3) | immb);
snprintf(instr->text, sizeof(instr->text), "%s%s.%d.%d d%d, q%d, #%d", opname, two, esize, esize * 2, Rd, Rn, shift);
} else if (opname && (!strcmp(opname, "shl"))) {
int esize = (8 << HighestSetBit(immh));
int r = Q ? 'q' : 'd';
int shift = ((immh << 3) | immb) - esize;
snprintf(instr->text, sizeof(instr->text), "%s.%d %c%d, %c%d, #%d", opname, esize, r, Rd, r, Rn, shift);
} else if (opname) { } else if (opname) {
int esize = (8 << HighestSetBit(immh)); int esize = (8 << HighestSetBit(immh));
int shift = ((immh << 3) | immb) - esize; int shift = ((immh << 3) | immb) - esize;

View file

@ -39,6 +39,32 @@ bool TestArm64Emitter() {
ARM64XEmitter emitter((u8 *)code); ARM64XEmitter emitter((u8 *)code);
ARM64FloatEmitter fp(&emitter); ARM64FloatEmitter fp(&emitter);
fp.SHRN(32, D0, Q3, 8);
RET(CheckLast(emitter, "0f388460 shrn.32.64 d0, q3, #8"));
fp.SHRN(8, D0, Q3, 4);
RET(CheckLast(emitter, "0f0c8460 shrn.8.16 d0, q3, #4"));
fp.XTN(32, D0, Q3);
RET(CheckLast(emitter, "0ea12860 xtn.32.64 d0, q3"));
fp.XTN(8, D4, Q1);
RET(CheckLast(emitter, "0e212824 xtn.8.16 d4, q1"));
fp.UMIN(32, D0, D3, D4);
RET(CheckLast(emitter, "2ea46c60 umin.32 d0, d3, d4"));
fp.UMAX(16, Q0, Q3, Q4);
RET(CheckLast(emitter, "6e646460 umax.16 q0, q3, q4"));
fp.SMIN(8, D0, D3, D4);
RET(CheckLast(emitter, "0e246c60 smin.8 d0, d3, d4"));
fp.SMAX(16, D0, D3, D4);
RET(CheckLast(emitter, "0e646460 smax.16 d0, d3, d4"));
fp.SHL(32, D0, D3, 18);
RET(CheckLast(emitter, "0f325460 shl.32 d0, d3, #18"));
fp.USHR(16, Q0, Q3, 7);
RET(CheckLast(emitter, "6f190460 ushr.16 q0, q3, #7"));
fp.SSHR(64, Q0, Q3, 38);
RET(CheckLast(emitter, "4f5a0460 sshr.64 q0, q3, #38"));
emitter.LDRH(INDEX_UNSIGNED, W3, X7, 18); emitter.LDRH(INDEX_UNSIGNED, W3, X7, 18);
RET(CheckLast(emitter, "794024e3 ldrh w3, [x7, #18]")); RET(CheckLast(emitter, "794024e3 ldrh w3, [x7, #18]"));
emitter.LDRSH(INDEX_UNSIGNED, W3, X7, 18); emitter.LDRSH(INDEX_UNSIGNED, W3, X7, 18);