diff options
author | Nate Begeman <natebegeman@mac.com> | 2005-07-06 18:59:04 +0000 |
---|---|---|
committer | Nate Begeman <natebegeman@mac.com> | 2005-07-06 18:59:04 +0000 |
commit | f63be7d3959939b2ffaf0bba5519b71216ec9ee6 (patch) | |
tree | d166173a3bc9390065664b66967fce46dbf794e6 /lib/Target/X86/X86TargetMachine.cpp | |
parent | e0fe225e8912b6308e3e1db442ba7b96d9f09ff3 (diff) |
First round of support for doing scalar FP using the SSE2 ISA extension and
XMM registers. There are many known deficiencies and fixmes, which will be
addressed ASAP. The major benefit of this work is that it will allow the
LLVM register allocator to allocate FP registers across basic blocks.
The x86 backend will still default to x87 style FP. To enable this work,
you must pass -enable-sse-scalar-fp and either -sse2 or -sse3 to llc.
An example before and after would be for:
double foo(double *P) { double Sum = 0; int i; for (i = 0; i < 1000; ++i)
Sum += P[i]; return Sum; }
The inner loop looks like the following:
x87:
.LBB_foo_1: # no_exit
fldl (%esp)
faddl (%eax,%ecx,8)
fstpl (%esp)
incl %ecx
cmpl $1000, %ecx
#FP_REG_KILL
jne .LBB_foo_1 # no_exit
SSE2:
addsd (%eax,%ecx,8), %xmm0
incl %ecx
cmpl $1000, %ecx
#FP_REG_KILL
jne .LBB_foo_1 # no_exit
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@22340 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/X86TargetMachine.cpp')
-rw-r--r-- | lib/Target/X86/X86TargetMachine.cpp | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 2330182372..def4f9cfa4 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -26,6 +26,7 @@ using namespace llvm; X86VectorEnum llvm::X86Vector = NoSSE; +bool llvm::X86ScalarSSE = false; /// X86TargetMachineModule - Note that this is used on hosts that cannot link /// in a library unless there are references into the library. In particular, @@ -41,8 +42,11 @@ namespace { cl::opt<bool> DisableOutput("disable-x86-llc-output", cl::Hidden, cl::desc("Disable the X86 asm printer, for use " "when profiling the code generator.")); + cl::opt<bool, true> EnableSSEFP("enable-sse-scalar-fp", + cl::desc("Perform FP math in SSE regs instead of the FP stack"), + cl::location(X86ScalarSSE), + cl::init(false)); -#if 0 // FIXME: This should eventually be handled with target triples and // subtarget support! cl::opt<X86VectorEnum, true> @@ -54,7 +58,6 @@ namespace { clEnumValN(SSE3, "sse3", " Enable SSE, SSE2, and SSE3 support"), clEnumValEnd), cl::location(X86Vector), cl::init(NoSSE)); -#endif // Register the target. RegisterTarget<X86TargetMachine> X("x86", " IA-32 (Pentium and above)"); @@ -91,6 +94,8 @@ X86TargetMachine::X86TargetMachine(const Module &M, IntrinsicLowering *IL) : TargetMachine("X86", IL, true, 4, 4, 4, 4, 4), FrameInfo(TargetFrameInfo::StackGrowsDown, 8, -4), JITInfo(*this) { + // Scalar SSE FP requires at least SSE2 + X86ScalarSSE &= X86Vector >= SSE2; } |