ARM64: Fixes to emitter and disassembly for logical immediates

This commit is contained in:
Henrik Rydgard 2015-03-18 14:44:01 +01:00
parent 0922db6062
commit d3669daba4
3 changed files with 101 additions and 17 deletions

View file

@ -22,7 +22,7 @@ const int kXRegSizeInBits = 64;
int CountLeadingZeros(uint64_t value, int width) {
// TODO(jbramley): Optimize this for ARM64 hosts.
int count = 0;
uint64_t bit_test = 1UL << (width - 1);
uint64_t bit_test = 1ULL << (width - 1);
while ((count < width) && ((bit_test & value) == 0)) {
count++;
bit_test >>= 1;
@ -723,7 +723,7 @@ void ARM64XEmitter::EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm,
(imm << 10) | (Rn << 5) | Rd);
}
void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)
void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n)
{
// Sometimes Rd is fixed to SP, but can still be 32bit or 64bit.
// Use Rn to determine bitness here.
@ -732,7 +732,7 @@ void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 i
Rd = DecodeReg(Rd);
Rn = DecodeReg(Rn);
Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (b64Bit << 22) | (invert << 21) | \
Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (n << 22) | \
(immr << 16) | (imms << 10) | (Rn << 5) | Rd);
}
@ -1382,7 +1382,11 @@ void ARM64XEmitter::BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shif
}
void ARM64XEmitter::MOV(ARM64Reg Rd, ARM64Reg Rm)
{
ORR(Rd, Is64Bit(Rd) ? SP : WSP, Rm, ArithOption(Rm, ST_LSL, 0));
if (IsGPR(Rd) && IsGPR(Rm)) {
ORR(Rd, Is64Bit(Rd) ? SP : WSP, Rm, ArithOption(Rm, ST_LSL, 0));
} else {
_assert_msg_(JIT, false, "Non-GPRs not supported in MOV");
}
}
void ARM64XEmitter::MVN(ARM64Reg Rd, ARM64Reg Rm)
{
@ -1797,7 +1801,7 @@ void ARM64XEmitter::ADRP(ARM64Reg Rd, s32 imm)
EncodeAddressInst(1, Rd, imm >> 12);
}
// Wrapper around MOVZ+MOVK
// Wrapper around MOVZ+MOVK (and later MOVN)
void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
{
unsigned int parts = Is64Bit(Rd) ? 4 : 2;
@ -1816,13 +1820,22 @@ void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
if ((Is64Bit(Rd) && imm == std::numeric_limits<u64>::max()) ||
(!Is64Bit(Rd) && imm == std::numeric_limits<u32>::max()))
{
// Max unsigned value
// Max unsigned value (or if signed, -1)
// Set to ~ZR
ARM64Reg ZR = Is64Bit(Rd) ? SP : WSP;
ORN(Rd, ZR, ZR, ArithOption(ZR, ST_LSL, 0));
return;
}
// TODO: Make some more systemic use of MOVN, but this will take care of most cases.
// Small negative integer. Use MOVN
if (!Is64Bit(Rd) && (imm | 0xFFFF0000) == imm) {
MOVN(Rd, ~imm, SHIFT_0);
return;
}
// XXX: Use MOVN when possible.
// XXX: Optimize more
// XXX: Support rotating immediates to save instructions
if (optimize)

View file

@ -87,6 +87,10 @@ inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; }
inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; }
inline bool IsQuad(ARM64Reg reg) { return (reg & 0xC0) == 0xC0; }
inline bool IsVector(ARM64Reg reg) { return (reg & 0xC0) != 0; }
inline bool IsGPR(ARM64Reg reg) { return (int)reg < 0x40; }
int CountLeadingZeros(uint64_t value, int width);
inline ARM64Reg DecodeReg(ARM64Reg reg) { return (ARM64Reg)(reg & 0x1F); }
inline ARM64Reg EncodeRegTo64(ARM64Reg reg) { return (ARM64Reg)(reg | 0x20); }
inline ARM64Reg EncodeRegToDouble(ARM64Reg reg) { return (ARM64Reg)((reg & ~0xC0) | 0x80); }
@ -349,7 +353,7 @@ private:
void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd);
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert);
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n);
void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm);
void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
@ -822,7 +826,6 @@ public:
// vector x indexed element
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
// ABI related
void ABI_PushRegisters(BitSet32 registers);
void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0));

View file

@ -49,6 +49,60 @@ int SignExtend12(int x) {
return (x & 0x00000800) ? (0xFFFFF000 | x) : (x & 0xFFF);
}
int HighestSetBit(int value) {
int highest = 0;
for (int i = 0; i < 32; i++) {
if (value & (1 << i))
highest = i;
}
return highest;
}
uint64_t Ones(int len) {
if (len == 0x40) {
return 0xFFFFFFFFFFFFFFFF;
}
return (1ULL << len) - 1;
}
uint64_t Replicate(uint64_t value, int esize) {
uint64_t out = 0;
value &= Ones(esize);
for (int i = 0; i < 64; i += esize) {
out |= value << i;
}
return out;
}
uint64_t ROR(uint64_t value, int amount, int esize) {
uint64_t rotated = (value >> amount) | (value << (esize - amount));
return rotated & Ones(esize);
}
void DecodeBitMasks(int immN, int imms, int immr, uint64_t *tmask, uint64_t *wmask) {
// Compute log2 of element size
// 2^len must be in range [2, M]
int len = HighestSetBit((immN << 6) | ((~imms) & 0x3f));
// if len < 1 then ReservedValue();
// assert M >= (1 << len);
// Determine S, R and S - R parameters
int levels = Ones(len);
uint32_t S = imms & levels;
uint32_t R = immr & levels;
int diff = S - R; // 6-bit subtract with borrow
int esize = 1 << len;
int d = diff & Ones(len - 1);
uint32_t welem = Ones(S + 1);
uint32_t telem = Ones(d + 1);
if (wmask) {
uint64_t rotated = ROR(welem, R, esize);
*wmask = Replicate(rotated, esize);
}
if (tmask) {
*tmask = Replicate(telem, esize);
}
}
static const char *conds[16] = {
"eq", // Equal
"ne", // Not equal
@ -77,21 +131,33 @@ static void DataProcessingImmediate(uint32_t w, uint64_t addr, Instruction *inst
int opc = (w >> 29) & 3;
int shift = ((w >> 21) & 0x3) * 16;
const char *opnames[4] = { "movn", "(undef)", "movz", "movk" };
snprintf(instr->text, sizeof(instr->text), "%s %c%d, 0x%04x << %d", opnames[opc], r, Rd, imm16, shift);
snprintf(instr->text, sizeof(instr->text), "%s %c%d, #0x%04x << %d", opnames[opc], r, Rd, imm16, shift);
} else if (((w >> 24) & 0x1F) == 0x10) {
// Address generation relative to PC
int op = w >> 31;
int imm = SignExtend19(w >> 5);
if (op & 1) imm <<= 12;
u64 daddr = addr + imm;
snprintf(instr->text, sizeof(instr->text), "%s x%d, 0x%04x%08x", op ? "adrp" : "adr", Rd, daddr >> 32, daddr & 0xFFFFFFFF);
snprintf(instr->text, sizeof(instr->text), "%s x%d, #0x%04x%08x", op ? "adrp" : "adr", Rd, daddr >> 32, daddr & 0xFFFFFFFF);
} else if (((w >> 24) & 0x1F) == 0x11) {
// Add/subtract immediate value
int op = (w >> 30) & 1;
int imm = ((w >> 10) & 0xFFF);
int shift = ((w >> 22) & 0x3) * 16;
imm <<= shift;
snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, %d", op == 0 ? "add" : "sub", r, Rd, r, Rn, imm);
snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, #%d", op == 0 ? "add" : "sub", r, Rd, r, Rn, imm);
} else if (((w >> 23) & 0x3f) == 0x24) {
int immr = (w >> 16) & 0x3f;
int imms = (w >> 10) & 0x3f;
int N = (w >> 22) & 1;
int opc = (w >> 29) & 3;
const char *opname[4] = { "and", "orr", "eor", "ands" };
uint64_t wmask;
DecodeBitMasks(N, imms, immr, NULL, &wmask);
if (((w >> 31) & 1) && wmask & 0xFFFFFFFF00000000ULL)
snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, #0x%x%08x", opname[opc], r, Rd, r, Rn, (wmask >> 32), (wmask & 0xFFFFFFFF));
else
snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, #0x%x", opname[opc], r, Rd, r, Rn, (uint32_t)wmask);
} else {
snprintf(instr->text, sizeof(instr->text), "(DPI %08x)", w);
}
@ -120,7 +186,7 @@ static void BranchExceptionAndSystem(uint32_t w, uint64_t addr, Instruction *ins
int offset = SignExtend19(w >> 5) << 2;
uint64_t target = addr + offset;
int cond = w & 0xF;
snprintf(instr->text, sizeof(instr->text), "b.%s %04x08x", conds[cond], (target >> 32), (target & 0xFFFFFFFF));
snprintf(instr->text, sizeof(instr->text), "b.%s %04x%08x", conds[cond], (target >> 32), (target & 0xFFFFFFFF));
} else if ((w >> 24) == 0xD4) {
snprintf(instr->text, sizeof(instr->text), "(exception-gen %08x)", w);
} else if (((w >> 20) & 0xFFC) == 0xD50) {
@ -212,9 +278,9 @@ static void DataProcessingRegister(uint32_t w, uint64_t addr, Instruction *instr
int N = (w >> 21) & 1;
int opc = (((w >> 29) & 3) << 1) | N;
const char *opnames[8] = { "and", "bic", "orr", "orn", "eor", "eon", "ands", "bics" };
if (opc == 3 && Rn == 31) {
if (opc == 2 && Rn == 31) {
// Special case for MOV (which is constructed from an ORR)
snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d", opnames[opc], r, Rd, r, Rm);
snprintf(instr->text, sizeof(instr->text), "mov %c%d, %c%d", r, Rd, r, Rm);
} else if (imm6 == 0) {
snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, %c%d", opnames[opc], r, Rd, r, Rn, r, Rm);
} else {
@ -235,20 +301,22 @@ static void FPandASIMD2(uint32_t w, uint64_t addr, Instruction *instr) {
int Rm = (w >> 16) & 0x1f;
int type = (w >> 22) & 0x3;
if ((w >> 24) == 0x1E) {
if (((w >> 10) & 0x39f) == 0x810) {
if (((w >> 10) & 0xf9f) == 0x810) {
const char *opnames[4] = { "fmov", "fabs", "fneg", "fsqrt" };
int opc = (w >> 15) & 0x3;
snprintf(instr->text, sizeof(instr->text), "%s !%d, !%d (%08x)", opnames[opc], Rd, Rn, w);
snprintf(instr->text, sizeof(instr->text), "%s s%d, s%d", opnames[opc], Rd, Rn); // TODO: Support doubles too
} else if (((w >> 10) & 3) == 2) {
// FP data-proc (2 source)
int opc = (w >> 12) & 0xf;
if (type == 0 || type == 1) {
const char *opnames[9] = { "fmul", "fdiv", "fadd", "fsub", "fmax", "fmin", "fmaxnm", "fminnm", "fnmul" };
char r = 's';
char r = 's'; // TODO: Support doubles too
snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, %c%d", opnames[opc], r, Rd, r, Rn, r, Rm);
} else {
snprintf(instr->text, sizeof(instr->text), "(FP2 %08x)", w);
}
} else {
snprintf(instr->text, sizeof(instr->text), "(FP2 %08x)", w);
}
} else {
snprintf(instr->text, sizeof(instr->text), "(FP2 %08x)", w);