FioraAeterna · December 15, 2014 03:45
diff --git a/gistfile1.diff b/gistfile1.diff
 commit 9fb92f192709341a5463a42da190c92425426de9
 Author: Fiora <fioraaeterna@gmail.com>
 Date:   Tue Oct 21 21:03:26 2014 -0700

    test

 diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h
 index 4dfbe56..38c2845 100644
 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h
 +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h
 @@ -127,6 +127,7 @@ public:
 	void DoMergedBranch();
 	void DoMergedBranchCondition();
 	void DoMergedBranchImmediate(s64 val);
 +	bool DoMergedIntegerOp();
 
 	// Reads a given bit of a given CR register part.
 	void GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate = false);
 diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
 index 41f1deb..5b20784 100644
 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
 +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
 @@ -9,6 +9,7 @@
 #include "Core/PowerPC/Jit64/Jit.h"
 #include "Core/PowerPC/Jit64/JitAsm.h"
 #include "Core/PowerPC/Jit64/JitRegCache.h"
 +#include "Common/GekkoDisassembler.h"
 
 using namespace Gen;
 
 @@ -216,6 +217,10 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
 		{
 			gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value));
 		}
 +		else if (DoMergedIntegerOp())
 +		{
 +			return;
 +		}
 		else if (a == d)
 		{
 			gpr.BindToRegister(d, true);
 @@ -257,6 +262,7 @@ void Jit64::reg_imm(UGeckoInstruction inst)
 	INSTRUCTION_START
 	JITDISABLE(bJITIntegerOff);
 	u32 d = inst.RD, a = inst.RA, s = inst.RS;
 +
 	switch (inst.OPCD)
 	{
 	case 14: // addi
 @@ -315,6 +321,133 @@ void Jit64::reg_imm(UGeckoInstruction inst)
 	}
 }
 
 +// Basic peephole optimization: combine neighboring simple operations using LEA.
 +// TODO: come up with an effective PPCAnalyst reordering pass to make this even better.
 +bool Jit64::DoMergedIntegerOp()
 +{
 +	UGeckoInstruction inst[2] = {js.op->inst, js.next_inst};
 +
 +	//std::string ppc_inst1 = GekkoDisassembler::Disassemble(inst[0].hex, 0);
 +	//std::string ppc_inst2 = GekkoDisassembler::Disassemble(inst[1].hex, 0);
 +	//ERROR_LOG(COMMON, "Trying to merge %s and %s at %x", ppc_inst1.c_str(), ppc_inst2.c_str(), js.op->address);
 +
 +	// We only support this merger if the intermediate value doesn't get stored in a
 +	// separate output, i.e. "b = a << 2; c = b + d;" can't merge unless b == c.
 +	if (js.op->regsOut[0] != js.next_op->regsOut[0])
 +		return false;
 +
 +	// At least one of the inputs of the second op has to be the output of the first op.
 +	if (js.next_op->regsIn[0] != js.op->regsOut[0] && js.next_op->regsIn[1] != js.op->regsOut[0])
 +		return false;
 +
 +	int reg_count = 1;
 +	int regs[2];
 +	int scale = 0;
 +	int offset = 0;
 +	int dst = js.next_op->regsOut[0];
 +	regs[0] = regs[1] = js.op->regsIn[0];
 +	for (int i = 0; i < 2; i++)
 +	{
 +		// rlwinm
 +		if (inst[i].OPCD == 21)
 +		{
 +			// Can't left-shift if we already have two register inputs
 +			if (reg_count > 1)
 +				return false;
 +			if (inst[i].Rc)
 +				return false;
 +			// Only supported if it's a left shift that fits in an LEA
 +			bool left_shift = inst[i].SH && inst[i].MB == 0 && inst[i].ME == 31 - inst[i].SH;
 +			if (!left_shift || inst[i].SH > 3)
 +				return false;
 +			scale += inst[i].SH;
 +			offset <<= inst[i].SH;
 +		}
 +		// addi
 +		else if (inst[i].OPCD == 14)
 +		{
 +			offset += (s32)inst[i].SIMM_16;
 +		}
 +		// addis
 +		else if (inst[i].OPCD == 15)
 +		{
 +			offset += (u32)inst[i].SIMM_16 << 16;
 +		}
 +		// addx
 +		else if (inst[i].OPCD == 31 && inst[i].SUBOP10 == 266)
 +		{
 +			if (inst[i].Rc)
 +				return false;
 +			// d = a + a is just a shift
 +			if (inst[i].RA == inst[i].RB)
 +			{
 +				scale++;
 +				offset <<= 1;
 +			}
 +			else
 +			{
 +				int src = inst[i].RA == (i ? dst : regs[0]) ? inst[i].RB : inst[i].RA;
 +				if (gpr.R(src).IsImm())
 +				{
 +					offset += (s32)gpr.R(src).offset;
 +				}
 +				else if (reg_count > 1)
 +				{
 +					return false;
 +				}
 +				else
 +				{
 +					regs[reg_count] = src;
 +					reg_count++;
 +				}
 +			}
 +		}
 +		else
 +		{
 +			// Not a supported instruction to merge
 +			return false;
 +		}
 +	}
 +	if (scale > 3)
 +		return false;
 +
 +	//ERROR_LOG(COMMON, "Success at %x", js.op->address);
 +	//return false;
 +	js.skipnext = true;
 +	js.downcountAmount++;
 +
 +	gpr.Lock(dst, regs[0], regs[1]);
 +	gpr.BindToRegister(dst, dst == regs[0]);
 +	if (reg_count == 1)
 +	{
 +		if (dst == regs[0] && !scale)
 +		{
 +			ADD(32, gpr.R(dst), Imm32(offset));
 +		}
 +		else if (dst == regs[0] && scale && !offset)
 +		{
 +			SHL(32, gpr.R(dst), Imm8(scale));
 +		}
 +		else if (!offset && !scale)
 +		{
 +			MOV(32, gpr.R(dst), gpr.R(regs[0]));
 +		}
 +		else
 +		{
 +			gpr.BindToRegister(regs[0], true, false);
 +			LEA(32, gpr.RX(dst), MScaled(gpr.RX(regs[0]), SCALE_1 << scale, offset));
 +		}
 +	}
 +	else
 +	{
 +		gpr.BindToRegister(regs[0], true, false);
 +		gpr.BindToRegister(regs[1], true, false);
 +		LEA(32, gpr.RX(dst), MComplex(gpr.RX(regs[1]), gpr.RX(regs[0]), SCALE_1 << scale, offset));
 +	}
 +	gpr.UnlockAll();
 +	return true;
 +}
 +
 bool Jit64::CheckMergedBranch(int crf)
 {
 	if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE))
 @@ -1247,6 +1380,10 @@ void Jit64::addx(UGeckoInstruction inst)
 		if (inst.OE)
 			GenerateConstantOverflow((s64)i + (s64)j);
 	}
 +	else if (DoMergedIntegerOp())
 +	{
 +		return;
 +	}
 	else if ((d == a) || (d == b))
 	{
 		int operand = ((d == a) ? b : a);
 @@ -1398,6 +1535,10 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
 		if (inst.Rc)
 			ComputeRC(gpr.R(a));
 	}
 +	else if (DoMergedIntegerOp())
 +	{
 +		return;
 +	}
 	else
 	{
 		bool left_shift = inst.SH && inst.MB == 0 && inst.ME == 31 - inst.SH;
	commit 9fb92f192709341a5463a42da190c92425426de9
	Author: Fiora <fioraaeterna@gmail.com>
	Date: Tue Oct 21 21:03:26 2014 -0700

	test

	diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h
	index 4dfbe56..38c2845 100644
	--- a/Source/Core/Core/PowerPC/Jit64/Jit.h
	+++ b/Source/Core/Core/PowerPC/Jit64/Jit.h
	@@ -127,6 +127,7 @@ public:
	void DoMergedBranch();
	void DoMergedBranchCondition();
	void DoMergedBranchImmediate(s64 val);
	+ bool DoMergedIntegerOp();

	// Reads a given bit of a given CR register part.
	void GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate = false);
	diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
	index 41f1deb..5b20784 100644
	--- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
	+++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp
	@@ -9,6 +9,7 @@
	#include "Core/PowerPC/Jit64/Jit.h"
	#include "Core/PowerPC/Jit64/JitAsm.h"
	#include "Core/PowerPC/Jit64/JitRegCache.h"
	+#include "Common/GekkoDisassembler.h"

	using namespace Gen;

	@@ -216,6 +217,10 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
	{
	gpr.SetImmediate32(d, doop((u32)gpr.R(a).offset, value));
	}
	+ else if (DoMergedIntegerOp())
	+ {
	+ return;
	+ }
	else if (a == d)
	{
	gpr.BindToRegister(d, true);
	@@ -257,6 +262,7 @@ void Jit64::reg_imm(UGeckoInstruction inst)
	INSTRUCTION_START
	JITDISABLE(bJITIntegerOff);
	u32 d = inst.RD, a = inst.RA, s = inst.RS;
	+
	switch (inst.OPCD)
	{
	case 14: // addi
	@@ -315,6 +321,133 @@ void Jit64::reg_imm(UGeckoInstruction inst)
	}
	}

	+// Basic peephole optimization: combine neighboring simple operations using LEA.
	+// TODO: come up with an effective PPCAnalyst reordering pass to make this even better.
	+bool Jit64::DoMergedIntegerOp()
	+{
	+ UGeckoInstruction inst[2] = {js.op->inst, js.next_inst};
	+
	+ //std::string ppc_inst1 = GekkoDisassembler::Disassemble(inst[0].hex, 0);
	+ //std::string ppc_inst2 = GekkoDisassembler::Disassemble(inst[1].hex, 0);
	+ //ERROR_LOG(COMMON, "Trying to merge %s and %s at %x", ppc_inst1.c_str(), ppc_inst2.c_str(), js.op->address);
	+
	+ // We only support this merger if the intermediate value doesn't get stored in a
	+ // separate output, i.e. "b = a << 2; c = b + d;" can't merge unless b == c.
	+ if (js.op->regsOut[0] != js.next_op->regsOut[0])
	+ return false;
	+
	+ // At least one of the inputs of the second op has to be the output of the first op.
	+ if (js.next_op->regsIn[0] != js.op->regsOut[0] && js.next_op->regsIn[1] != js.op->regsOut[0])
	+ return false;
	+
	+ int reg_count = 1;
	+ int regs[2];
	+ int scale = 0;
	+ int offset = 0;
	+ int dst = js.next_op->regsOut[0];
	+ regs[0] = regs[1] = js.op->regsIn[0];
	+ for (int i = 0; i < 2; i++)
	+ {
	+ // rlwinm
	+ if (inst[i].OPCD == 21)
	+ {
	+ // Can't left-shift if we already have two register inputs
	+ if (reg_count > 1)
	+ return false;
	+ if (inst[i].Rc)
	+ return false;
	+ // Only supported if it's a left shift that fits in an LEA
	+ bool left_shift = inst[i].SH && inst[i].MB == 0 && inst[i].ME == 31 - inst[i].SH;
	+ if (!left_shift \|\| inst[i].SH > 3)
	+ return false;
	+ scale += inst[i].SH;
	+ offset <<= inst[i].SH;
	+ }
	+ // addi
	+ else if (inst[i].OPCD == 14)
	+ {
	+ offset += (s32)inst[i].SIMM_16;
	+ }
	+ // addis
	+ else if (inst[i].OPCD == 15)
	+ {
	+ offset += (u32)inst[i].SIMM_16 << 16;
	+ }
	+ // addx
	+ else if (inst[i].OPCD == 31 && inst[i].SUBOP10 == 266)
	+ {
	+ if (inst[i].Rc)
	+ return false;
	+ // d = a + a is just a shift
	+ if (inst[i].RA == inst[i].RB)
	+ {
	+ scale++;
	+ offset <<= 1;
	+ }
	+ else
	+ {
	+ int src = inst[i].RA == (i ? dst : regs[0]) ? inst[i].RB : inst[i].RA;
	+ if (gpr.R(src).IsImm())
	+ {
	+ offset += (s32)gpr.R(src).offset;
	+ }
	+ else if (reg_count > 1)
	+ {
	+ return false;
	+ }
	+ else
	+ {
	+ regs[reg_count] = src;
	+ reg_count++;
	+ }
	+ }
	+ }
	+ else
	+ {
	+ // Not a supported instruction to merge
	+ return false;
	+ }
	+ }
	+ if (scale > 3)
	+ return false;
	+
	+ //ERROR_LOG(COMMON, "Success at %x", js.op->address);
	+ //return false;
	+ js.skipnext = true;
	+ js.downcountAmount++;
	+
	+ gpr.Lock(dst, regs[0], regs[1]);
	+ gpr.BindToRegister(dst, dst == regs[0]);
	+ if (reg_count == 1)
	+ {
	+ if (dst == regs[0] && !scale)
	+ {
	+ ADD(32, gpr.R(dst), Imm32(offset));
	+ }
	+ else if (dst == regs[0] && scale && !offset)
	+ {
	+ SHL(32, gpr.R(dst), Imm8(scale));
	+ }
	+ else if (!offset && !scale)
	+ {
	+ MOV(32, gpr.R(dst), gpr.R(regs[0]));
	+ }
	+ else
	+ {
	+ gpr.BindToRegister(regs[0], true, false);
	+ LEA(32, gpr.RX(dst), MScaled(gpr.RX(regs[0]), SCALE_1 << scale, offset));
	+ }
	+ }
	+ else
	+ {
	+ gpr.BindToRegister(regs[0], true, false);
	+ gpr.BindToRegister(regs[1], true, false);
	+ LEA(32, gpr.RX(dst), MComplex(gpr.RX(regs[1]), gpr.RX(regs[0]), SCALE_1 << scale, offset));
	+ }
	+ gpr.UnlockAll();
	+ return true;
	+}
	+
	bool Jit64::CheckMergedBranch(int crf)
	{
	if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_BRANCH_MERGE))
	@@ -1247,6 +1380,10 @@ void Jit64::addx(UGeckoInstruction inst)
	if (inst.OE)
	GenerateConstantOverflow((s64)i + (s64)j);
	}
	+ else if (DoMergedIntegerOp())
	+ {
	+ return;
	+ }
	else if ((d == a) \|\| (d == b))
	{
	int operand = ((d == a) ? b : a);
	@@ -1398,6 +1535,10 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
	if (inst.Rc)
	ComputeRC(gpr.R(a));
	}
	+ else if (DoMergedIntegerOp())
	+ {
	+ return;
	+ }
	else
	{
	bool left_shift = inst.SH && inst.MB == 0 && inst.ME == 31 - inst.SH;