diff options
author | Preston Gurd <preston.gurd@intel.com> | 2013-03-27 19:14:02 +0000 |
---|---|---|
committer | Preston Gurd <preston.gurd@intel.com> | 2013-03-27 19:14:02 +0000 |
commit | 1edadea42f6f5c393b4fdb9d7ce1cf7eb9c24ab4 (patch) | |
tree | 0703e20d41246fa36a72779d0d1ba5b58b6ee2d7 | |
parent | e915047fed99221afb8c540d8a7e81038a6483f1 (diff) |
For the current Atom processor, the fastest way to handle a call
indirect through a memory address is to load the memory address into
a register and then call indirect through the register.
This patch implements this improvement by modifying SelectionDAG to
force a function address which is a memory reference to be loaded
into a virtual register.
Patch by Sriram Murali.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178171 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/X86/X86.td | 7 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 13 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.cpp | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.h | 5 | ||||
-rw-r--r-- | test/CodeGen/X86/atom-call-reg-indirect.ll | 45 |
5 files changed, 70 insertions, 1 deletions
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index e87da56cc6..bf095017f8 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -134,6 +134,9 @@ def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions", "PadShortFunctions", "true", "Pad short functions">; +def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect", + "CallRegIndirect", "true", + "Call register indirect">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -181,7 +184,9 @@ def : ProcessorModel<"penryn", SandyBridgeModel, def : ProcessorModel<"atom", AtomModel, [ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B, FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, - FeatureSlowDivide, FeaturePadShortFunctions]>; + FeatureSlowDivide, + FeatureCallRegIndirect, + FeaturePadShortFunctions]>; // "Arrandale" along with corei3 and corei5 def : ProcessorModel<"corei7", SandyBridgeModel, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9ef6a3bca0..0eaab0f818 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2629,6 +2629,19 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InFlag = Chain.getValue(1); } + // Use indirect reference through register, when CALL uses a memory reference. + if (Subtarget->callRegIndirect() && + Callee.getOpcode() == ISD::LOAD) { + const TargetRegisterClass *AddrRegClass = + getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32); + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned VReg = MRI.createVirtualRegister(AddrRegClass); + SDValue tempValue = DAG.getCopyFromReg(Callee, + dl, VReg, Callee.getValueType()); + Chain = DAG.getCopyToReg(Chain, dl, VReg, tempValue, InFlag); + InFlag = Chain.getValue(1); + } + Ops.push_back(Chain); Ops.push_back(Callee); diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 4b368b4901..6e66c1aa96 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -458,6 +458,7 @@ void X86Subtarget::initializeEnvironment() { HasSlowDivide = false; PostRAScheduler = false; PadShortFunctions = false; + CallRegIndirect = false; stackAlignment = 4; // FIXME: this is a known good value for Yonah. How about others? MaxInlineSizeThreshold = 128; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 108ef0eb1c..cac3f579b0 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -159,6 +159,10 @@ protected: /// a stall when returning too early. bool PadShortFunctions; + /// CallRegIndirect - True if the Calls with memory reference should be converted + /// to a register-based indirect call. + bool CallRegIndirect; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -269,6 +273,7 @@ public: bool useLeaForSP() const { return UseLeaForSP; } bool hasSlowDivide() const { return HasSlowDivide; } bool padShortFunctions() const { return PadShortFunctions; } + bool callRegIndirect() const { return CallRegIndirect; } bool isAtom() const { return X86ProcFamily == IntelAtom; } diff --git a/test/CodeGen/X86/atom-call-reg-indirect.ll b/test/CodeGen/X86/atom-call-reg-indirect.ll new file mode 100644 index 0000000000..632781130d --- /dev/null +++ b/test/CodeGen/X86/atom-call-reg-indirect.ll @@ -0,0 +1,45 @@ +; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck -check-prefix=ATOM32 %s +; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck -check-prefix=ATOM-NOT32 %s +; RUN: llc < %s -mcpu=atom -mtriple=x86_64-linux | FileCheck -check-prefix=ATOM64 %s +; RUN: llc < %s -mcpu=core2 -mtriple=x86_64-linux | FileCheck -check-prefix=ATOM-NOT64 %s + + +; fn_ptr.ll +%class.A = type { i32 (...)** } + +define i32 @test1() #0 { + ;ATOM: test1 +entry: + %call = tail call %class.A* @_Z3facv() + %0 = bitcast %class.A* %call to void (%class.A*)*** + %vtable = load void (%class.A*)*** %0, align 8 + %1 = load void (%class.A*)** %vtable, align 8 + ;ATOM32: movl (%ecx), %ecx + ;ATOM32: calll *%ecx + ;ATOM-NOT32: calll *(%ecx) + ;ATOM64: movq (%rcx), %rcx + ;ATOM64: callq *%rcx + ;ATOM-NOT64: callq *(%rcx) + tail call void %1(%class.A* %call) + ret i32 0 +} + +declare %class.A* @_Z3facv() #1 + +; virt_fn.ll +@p = external global void (i32)** + +define i32 @test2() #0 { + ;ATOM: test2 +entry: + %0 = load void (i32)*** @p, align 8 + %1 = load void (i32)** %0, align 8 + ;ATOM32: movl (%eax), %eax + ;ATOM32: calll *%eax + ;ATOM-NOT: calll *(%eax) + ;ATOM64: movq (%rax), %rax + ;ATOM64: callq *%rax + ;ATOM-NOT64: callq *(%rax) + tail call void %1(i32 2) + ret i32 0 +} |