From d3669daba461759da8aaebb623e017e88f3eaef9 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 18 Mar 2015 14:44:01 +0100 Subject: [PATCH] ARM64: Fixes to emitter and disassembly for logical immediates --- Common/Arm64Emitter.cpp | 25 +++++++++--- Common/Arm64Emitter.h | 7 +++- Core/Util/DisArm64.cpp | 86 ++++++++++++++++++++++++++++++++++++----- 3 files changed, 101 insertions(+), 17 deletions(-) diff --git a/Common/Arm64Emitter.cpp b/Common/Arm64Emitter.cpp index 99fec9cfd..0cf4bfed5 100644 --- a/Common/Arm64Emitter.cpp +++ b/Common/Arm64Emitter.cpp @@ -22,7 +22,7 @@ const int kXRegSizeInBits = 64; int CountLeadingZeros(uint64_t value, int width) { // TODO(jbramley): Optimize this for ARM64 hosts. int count = 0; - uint64_t bit_test = 1UL << (width - 1); + uint64_t bit_test = 1ULL << (width - 1); while ((count < width) && ((bit_test & value) == 0)) { count++; bit_test >>= 1; @@ -723,7 +723,7 @@ void ARM64XEmitter::EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, (imm << 10) | (Rn << 5) | Rd); } -void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert) +void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n) { // Sometimes Rd is fixed to SP, but can still be 32bit or 64bit. // Use Rn to determine bitness here. @@ -732,7 +732,7 @@ void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 i Rd = DecodeReg(Rd); Rn = DecodeReg(Rn); - Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (b64Bit << 22) | (invert << 21) | \ + Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (n << 22) | \ (immr << 16) | (imms << 10) | (Rn << 5) | Rd); } @@ -1382,7 +1382,11 @@ void ARM64XEmitter::BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shif } void ARM64XEmitter::MOV(ARM64Reg Rd, ARM64Reg Rm) { - ORR(Rd, Is64Bit(Rd) ? SP : WSP, Rm, ArithOption(Rm, ST_LSL, 0)); + if (IsGPR(Rd) && IsGPR(Rm)) { + ORR(Rd, Is64Bit(Rd) ? SP : WSP, Rm, ArithOption(Rm, ST_LSL, 0)); + } else { + _assert_msg_(JIT, false, "Non-GPRs not supported in MOV"); + } } void ARM64XEmitter::MVN(ARM64Reg Rd, ARM64Reg Rm) { @@ -1797,7 +1801,7 @@ void ARM64XEmitter::ADRP(ARM64Reg Rd, s32 imm) EncodeAddressInst(1, Rd, imm >> 12); } -// Wrapper around MOVZ+MOVK +// Wrapper around MOVZ+MOVK (and later MOVN) void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize) { unsigned int parts = Is64Bit(Rd) ? 4 : 2; @@ -1816,13 +1820,22 @@ void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize) if ((Is64Bit(Rd) && imm == std::numeric_limits::max()) || (!Is64Bit(Rd) && imm == std::numeric_limits::max())) { - // Max unsigned value + // Max unsigned value (or if signed, -1) // Set to ~ZR ARM64Reg ZR = Is64Bit(Rd) ? SP : WSP; ORN(Rd, ZR, ZR, ArithOption(ZR, ST_LSL, 0)); return; } + // TODO: Make some more systemic use of MOVN, but this will take care of most cases. + // Small negative integer. Use MOVN + if (!Is64Bit(Rd) && (imm | 0xFFFF0000) == imm) { + MOVN(Rd, ~imm, SHIFT_0); + return; + } + + + // XXX: Use MOVN when possible. // XXX: Optimize more // XXX: Support rotating immediates to save instructions if (optimize) diff --git a/Common/Arm64Emitter.h b/Common/Arm64Emitter.h index e7576966e..0cf4bfad1 100644 --- a/Common/Arm64Emitter.h +++ b/Common/Arm64Emitter.h @@ -87,6 +87,10 @@ inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; } inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; } inline bool IsQuad(ARM64Reg reg) { return (reg & 0xC0) == 0xC0; } inline bool IsVector(ARM64Reg reg) { return (reg & 0xC0) != 0; } +inline bool IsGPR(ARM64Reg reg) { return (int)reg < 0x40; } + +int CountLeadingZeros(uint64_t value, int width); + inline ARM64Reg DecodeReg(ARM64Reg reg) { return (ARM64Reg)(reg & 0x1F); } inline ARM64Reg EncodeRegTo64(ARM64Reg reg) { return (ARM64Reg)(reg | 0x20); } inline ARM64Reg EncodeRegToDouble(ARM64Reg reg) { return (ARM64Reg)((reg & ~0xC0) | 0x80); } @@ -349,7 +353,7 @@ private: void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms); void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm); void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd); - void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert); + void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n); void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm); void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm); void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm); @@ -822,7 +826,6 @@ public: // vector x indexed element void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index); - // ABI related void ABI_PushRegisters(BitSet32 registers); void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0)); diff --git a/Core/Util/DisArm64.cpp b/Core/Util/DisArm64.cpp index 29183cdbb..1d10f1ff1 100644 --- a/Core/Util/DisArm64.cpp +++ b/Core/Util/DisArm64.cpp @@ -49,6 +49,60 @@ int SignExtend12(int x) { return (x & 0x00000800) ? (0xFFFFF000 | x) : (x & 0xFFF); } +int HighestSetBit(int value) { + int highest = 0; + for (int i = 0; i < 32; i++) { + if (value & (1 << i)) + highest = i; + } + return highest; +} + +uint64_t Ones(int len) { + if (len == 0x40) { + return 0xFFFFFFFFFFFFFFFF; + } + return (1ULL << len) - 1; +} + +uint64_t Replicate(uint64_t value, int esize) { + uint64_t out = 0; + value &= Ones(esize); + for (int i = 0; i < 64; i += esize) { + out |= value << i; + } + return out; +} + +uint64_t ROR(uint64_t value, int amount, int esize) { + uint64_t rotated = (value >> amount) | (value << (esize - amount)); + return rotated & Ones(esize); +} + +void DecodeBitMasks(int immN, int imms, int immr, uint64_t *tmask, uint64_t *wmask) { + // Compute log2 of element size + // 2^len must be in range [2, M] + int len = HighestSetBit((immN << 6) | ((~imms) & 0x3f)); + // if len < 1 then ReservedValue(); + // assert M >= (1 << len); + // Determine S, R and S - R parameters + int levels = Ones(len); + uint32_t S = imms & levels; + uint32_t R = immr & levels; + int diff = S - R; // 6-bit subtract with borrow + int esize = 1 << len; + int d = diff & Ones(len - 1); + uint32_t welem = Ones(S + 1); + uint32_t telem = Ones(d + 1); + if (wmask) { + uint64_t rotated = ROR(welem, R, esize); + *wmask = Replicate(rotated, esize); + } + if (tmask) { + *tmask = Replicate(telem, esize); + } +} + static const char *conds[16] = { "eq", // Equal "ne", // Not equal @@ -77,21 +131,33 @@ static void DataProcessingImmediate(uint32_t w, uint64_t addr, Instruction *inst int opc = (w >> 29) & 3; int shift = ((w >> 21) & 0x3) * 16; const char *opnames[4] = { "movn", "(undef)", "movz", "movk" }; - snprintf(instr->text, sizeof(instr->text), "%s %c%d, 0x%04x << %d", opnames[opc], r, Rd, imm16, shift); + snprintf(instr->text, sizeof(instr->text), "%s %c%d, #0x%04x << %d", opnames[opc], r, Rd, imm16, shift); } else if (((w >> 24) & 0x1F) == 0x10) { // Address generation relative to PC int op = w >> 31; int imm = SignExtend19(w >> 5); if (op & 1) imm <<= 12; u64 daddr = addr + imm; - snprintf(instr->text, sizeof(instr->text), "%s x%d, 0x%04x%08x", op ? "adrp" : "adr", Rd, daddr >> 32, daddr & 0xFFFFFFFF); + snprintf(instr->text, sizeof(instr->text), "%s x%d, #0x%04x%08x", op ? "adrp" : "adr", Rd, daddr >> 32, daddr & 0xFFFFFFFF); } else if (((w >> 24) & 0x1F) == 0x11) { // Add/subtract immediate value int op = (w >> 30) & 1; int imm = ((w >> 10) & 0xFFF); int shift = ((w >> 22) & 0x3) * 16; imm <<= shift; - snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, %d", op == 0 ? "add" : "sub", r, Rd, r, Rn, imm); + snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, #%d", op == 0 ? "add" : "sub", r, Rd, r, Rn, imm); + } else if (((w >> 23) & 0x3f) == 0x24) { + int immr = (w >> 16) & 0x3f; + int imms = (w >> 10) & 0x3f; + int N = (w >> 22) & 1; + int opc = (w >> 29) & 3; + const char *opname[4] = { "and", "orr", "eor", "ands" }; + uint64_t wmask; + DecodeBitMasks(N, imms, immr, NULL, &wmask); + if (((w >> 31) & 1) && wmask & 0xFFFFFFFF00000000ULL) + snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, #0x%x%08x", opname[opc], r, Rd, r, Rn, (wmask >> 32), (wmask & 0xFFFFFFFF)); + else + snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, #0x%x", opname[opc], r, Rd, r, Rn, (uint32_t)wmask); } else { snprintf(instr->text, sizeof(instr->text), "(DPI %08x)", w); } @@ -120,7 +186,7 @@ static void BranchExceptionAndSystem(uint32_t w, uint64_t addr, Instruction *ins int offset = SignExtend19(w >> 5) << 2; uint64_t target = addr + offset; int cond = w & 0xF; - snprintf(instr->text, sizeof(instr->text), "b.%s %04x08x", conds[cond], (target >> 32), (target & 0xFFFFFFFF)); + snprintf(instr->text, sizeof(instr->text), "b.%s %04x%08x", conds[cond], (target >> 32), (target & 0xFFFFFFFF)); } else if ((w >> 24) == 0xD4) { snprintf(instr->text, sizeof(instr->text), "(exception-gen %08x)", w); } else if (((w >> 20) & 0xFFC) == 0xD50) { @@ -212,9 +278,9 @@ static void DataProcessingRegister(uint32_t w, uint64_t addr, Instruction *instr int N = (w >> 21) & 1; int opc = (((w >> 29) & 3) << 1) | N; const char *opnames[8] = { "and", "bic", "orr", "orn", "eor", "eon", "ands", "bics" }; - if (opc == 3 && Rn == 31) { + if (opc == 2 && Rn == 31) { // Special case for MOV (which is constructed from an ORR) - snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d", opnames[opc], r, Rd, r, Rm); + snprintf(instr->text, sizeof(instr->text), "mov %c%d, %c%d", r, Rd, r, Rm); } else if (imm6 == 0) { snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, %c%d", opnames[opc], r, Rd, r, Rn, r, Rm); } else { @@ -235,20 +301,22 @@ static void FPandASIMD2(uint32_t w, uint64_t addr, Instruction *instr) { int Rm = (w >> 16) & 0x1f; int type = (w >> 22) & 0x3; if ((w >> 24) == 0x1E) { - if (((w >> 10) & 0x39f) == 0x810) { + if (((w >> 10) & 0xf9f) == 0x810) { const char *opnames[4] = { "fmov", "fabs", "fneg", "fsqrt" }; int opc = (w >> 15) & 0x3; - snprintf(instr->text, sizeof(instr->text), "%s !%d, !%d (%08x)", opnames[opc], Rd, Rn, w); + snprintf(instr->text, sizeof(instr->text), "%s s%d, s%d", opnames[opc], Rd, Rn); // TODO: Support doubles too } else if (((w >> 10) & 3) == 2) { // FP data-proc (2 source) int opc = (w >> 12) & 0xf; if (type == 0 || type == 1) { const char *opnames[9] = { "fmul", "fdiv", "fadd", "fsub", "fmax", "fmin", "fmaxnm", "fminnm", "fnmul" }; - char r = 's'; + char r = 's'; // TODO: Support doubles too snprintf(instr->text, sizeof(instr->text), "%s %c%d, %c%d, %c%d", opnames[opc], r, Rd, r, Rn, r, Rm); } else { snprintf(instr->text, sizeof(instr->text), "(FP2 %08x)", w); } + } else { + snprintf(instr->text, sizeof(instr->text), "(FP2 %08x)", w); } } else { snprintf(instr->text, sizeof(instr->text), "(FP2 %08x)", w);