Skip to content

Instantly share code, notes, and snippets.

@FioraAeterna
Created December 15, 2014 03:45
Show Gist options
  • Save FioraAeterna/22d813067a01ce396b65 to your computer and use it in GitHub Desktop.
Save FioraAeterna/22d813067a01ce396b65 to your computer and use it in GitHub Desktop.
peephole optimizer
commit 9fb92f192709341a5463a42da190c92425426de9
Author: Fiora <fioraaeterna@gmail.com>
Date: Tue Oct 21 21:03:26 2014 -0700
test
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h
index 4dfbe56..38c2845 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit.h
+++ b/Source/Core/Core/PowerPC/Jit64/Jit.h
@@ -127,6 +127,7 @@ public:
void DoMergedBranch();
void DoMergedBranchCondition();
void DoMergedBranchImmediate(s64 val);
+ bool DoMergedIntegerOp();
// Reads a given bit of a given CR register part.
void GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate = false);
diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
index 41f1deb..5b20784 100644
--- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
@@ -9,6 +9,7 @@
#include "Core/PowerPC/Jit64/Jit.h"
#include "Core/PowerPC/Jit64/JitAsm.h"
#include "Core/PowerPC/Jit64/JitRegCache.h"
+#include "Common/GekkoDisassembler.h"
using namespace Gen;
@@ -216,6 +217,10 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
{
gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value));
}
+ else if (DoMergedIntegerOp())
+ {
+ return;
+ }
else if (a == d)
{
gpr.BindToRegister(d, true);
@@ -257,6 +262,7 @@ void Jit64::reg_imm(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(bJITIntegerOff);
u32 d = inst.RD, a = inst.RA, s = inst.RS;
+
switch (inst.OPCD)
{
case 14: // addi
@@ -315,6 +321,133 @@ void Jit64::reg_imm(UGeckoInstruction inst)
}
}
+// Basic peephole optimization: combine neighboring simple operations using LEA.
+// TODO: come up with an effective PPCAnalyst reordering pass to make this even better.
+bool Jit64::DoMergedIntegerOp()
+{
+ UGeckoInstruction inst[2] = {js.op->inst, js.next_inst};
+
+ //std::string ppc_inst1 = GekkoDisassembler::Disassemble(inst[0].hex, 0);
+ //std::string ppc_inst2 = GekkoDisassembler::Disassemble(inst[1].hex, 0);
+ //ERROR_LOG(COMMON, "Trying to merge %s and %s at %x", ppc_inst1.c_str(), ppc_inst2.c_str(), js.op->address);
+
+ // We only support this merger if the intermediate value doesn't get stored in a
+ // separate output, i.e. "b = a << 2; c = b + d;" can't merge unless b == c.
+ if (js.op->regsOut[0] != js.next_op->regsOut[0])
+ return false;
+
+ // At least one of the inputs of the second op has to be the output of the first op.
+ if (js.next_op->regsIn[0] != js.op->regsOut[0] && js.next_op->regsIn[1] != js.op->regsOut[0])
+ return false;
+
+ int reg_count = 1;
+ int regs[2];
+ int scale = 0;
+ int offset = 0;
+ int dst = js.next_op->regsOut[0];
+ regs[0] = regs[1] = js.op->regsIn[0];
+ for (int i = 0; i < 2; i++)
+ {
+ // rlwinm
+ if (inst[i].OPCD == 21)
+ {
+ // Can't left-shift if we already have two register inputs
+ if (reg_count > 1)
+ return false;
+ if (inst[i].Rc)
+ return false;
+ // Only supported if it's a left shift that fits in an LEA
+ bool left_shift = inst[i].SH && inst[i].MB == 0 && inst[i].ME == 31 - inst[i].SH;
+ if (!left_shift || inst[i].SH > 3)
+ return false;
+ scale += inst[i].SH;
+ offset <<= inst[i].SH;
+ }
+ // addi
+ else if (inst[i].OPCD == 14)
+ {
+ offset += (s32)inst[i].SIMM_16;
+ }
+ // addis
+ else if (inst[i].OPCD == 15)
+ {
+ offset += (u32)inst[i].SIMM_16 << 16;
+ }
+ // addx
+ else if (inst[i].OPCD == 31 && inst[i].SUBOP10 == 266)
+ {
+ if (inst[i].Rc)
+ return false;
+ // d = a + a is just a shift
+ if (inst[i].RA == inst[i].RB)
+ {
+ scale++;
+ offset <<= 1;
+ }
+ else
+ {
+ int src = inst[i].RA == (i ? dst : regs[0]) ? inst[i].RB : inst[i].RA;
+ if (gpr.R(src).IsImm())
+ {
+ offset += (s32)gpr.R(src).offset;
+ }
+ else if (reg_count > 1)
+ {
+ return false;
+ }
+ else
+ {
+ regs[reg_count] = src;
+ reg_count++;
+ }
+ }
+ }
+ else
+ {
+ // Not a supported instruction to merge
+ return false;
+ }
+ }
+ if (scale > 3)
+ return false;
+
+ //ERROR_LOG(COMMON, "Success at %x", js.op->address);
+ //return false;
+ js.skipnext = true;
+ js.downcountAmount++;
+
+ gpr.Lock(dst, regs[0], regs[1]);
+ gpr.BindToRegister(dst, dst == regs[0]);
+ if (reg_count == 1)
+ {
+ if (dst == regs[0] && !scale)
+ {
+ ADD(32, gpr.R(dst), Imm32(offset));
+ }
+ else if (dst == regs[0] && scale && !offset)
+ {
+ SHL(32, gpr.R(dst), Imm8(scale));
+ }
+ else if (!offset && !scale)
+ {
+ MOV(32, gpr.R(dst), gpr.R(regs[0]));
+ }
+ else
+ {
+ gpr.BindToRegister(regs[0], true, false);
+ LEA(32, gpr.RX(dst), MScaled(gpr.RX(regs[0]), SCALE_1 << scale, offset));
+ }
+ }
+ else
+ {
+ gpr.BindToRegister(regs[0], true, false);
+ gpr.BindToRegister(regs[1], true, false);
+ LEA(32, gpr.RX(dst), MComplex(gpr.RX(regs[1]), gpr.RX(regs[0]), SCALE_1 << scale, offset));
+ }
+ gpr.UnlockAll();
+ return true;
+}
+
bool Jit64::CheckMergedBranch(int crf)
{
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE))
@@ -1247,6 +1380,10 @@ void Jit64::addx(UGeckoInstruction inst)
if (inst.OE)
GenerateConstantOverflow((s64)i + (s64)j);
}
+ else if (DoMergedIntegerOp())
+ {
+ return;
+ }
else if ((d == a) || (d == b))
{
int operand = ((d == a) ? b : a);
@@ -1398,6 +1535,10 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
if (inst.Rc)
ComputeRC(gpr.R(a));
}
+ else if (DoMergedIntegerOp())
+ {
+ return;
+ }
else
{
bool left_shift = inst.SH && inst.MB == 0 && inst.ME == 31 - inst.SH;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment