aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2009-02-04 19:08:01 +0000
committerChris Lattner <sabre@nondot.org>2009-02-04 19:08:01 +0000
commit8dfdf5d62d881301022279de5778b029a7aee6b1 (patch)
tree21fe36c0c294b10e78708314152bec3f964b87b3
parent6792e90fec54921fc5ec72f876b80edd47636cd0 (diff)
add a note, this is why we're faster at SciMark-MonteCarlo with
SSE disabled. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@63751 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/README-SSE.txt40
1 files changed, 40 insertions, 0 deletions
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index bc51b53482..67cad42a35 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -912,3 +912,43 @@ since we know the stack slot is already zext'd.
Consider using movlps instead of movsd to implement (scalar_to_vector (loadf64))
when code size is critical. movlps is slower than movsd on core2 but it's one
byte shorter.
+
+//===---------------------------------------------------------------------===//
+
+We should use a dynamic programming based approach to tell when using FPStack
+operations is cheaper than SSE. SciMark montecarlo contains code like this
+for example:
+
+double MonteCarlo_num_flops(int Num_samples) {
+ return ((double) Num_samples)* 4.0;
+}
+
+In fpstack mode, this compiles into:
+
+LCPI1_0:
+ .long 1082130432 ## float 4.000000e+00
+_MonteCarlo_num_flops:
+ subl $4, %esp
+ movl 8(%esp), %eax
+ movl %eax, (%esp)
+ fildl (%esp)
+ fmuls LCPI1_0
+ addl $4, %esp
+ ret
+
+in SSE mode, it compiles into significantly slower code:
+
+_MonteCarlo_num_flops:
+ subl $12, %esp
+ cvtsi2sd 16(%esp), %xmm0
+ mulsd LCPI1_0, %xmm0
+ movsd %xmm0, (%esp)
+ fldl (%esp)
+ addl $12, %esp
+ ret
+
+There are also other cases in scimark where using fpstack is better, it is
+cheaper to do fld1 than load from a constant pool for example, so
+"load, add 1.0, store" is better done in the fp stack, etc.
+
+//===---------------------------------------------------------------------===//