aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2006-04-21 21:03:21 +0000
committerChris Lattner <sabre@nondot.org>2006-04-21 21:03:21 +0000
commit3e663a6c160eb4b36edfc1a0cbdb9f82d491e034 (patch)
tree63e55d322e4e34800c13f7e1c1b1ef66903b5d7f
parent4a1cd9c61e3e648587d33d5f02ddad97e87f16aa (diff)
add some low-prio notes
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@27934 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/README.txt69
1 files changed, 69 insertions, 0 deletions
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 6dfa084bc4..a1bb9536b7 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1054,3 +1054,72 @@ Another example: {0.0, 0.0, a, b }
//===---------------------------------------------------------------------===//
+Consider:
+
+__m128 test(float a) {
+ return _mm_set_ps(0.0, 0.0, 0.0, a*a);
+}
+
+This compiles into:
+
+movss 4(%esp), %xmm1
+mulss %xmm1, %xmm1
+xorps %xmm0, %xmm0
+movss %xmm1, %xmm0
+ret
+
+Because mulss multiplies 0*0 = 0.0, the top elements of xmm1 are already zerod.
+We could compile this to:
+
+movss 4(%esp), %xmm0
+mulss %xmm0, %xmm0
+ret
+
+//===---------------------------------------------------------------------===//
+
+Here's a sick and twisted idea. Consider code like this:
+
+__m128 test(__m128 a) {
+ float b = *(float*)&A;
+ ...
+ return _mm_set_ps(0.0, 0.0, 0.0, b);
+}
+
+This might compile to this code:
+
+movaps c(%esp), %xmm1
+xorps %xmm0, %xmm0
+movss %xmm1, %xmm0
+ret
+
+Now consider if the ... code caused xmm1 to get spilled. This might produce
+this code:
+
+movaps c(%esp), %xmm1
+movaps %xmm1, c2(%esp)
+...
+
+xorps %xmm0, %xmm0
+movaps c2(%esp), %xmm1
+movss %xmm1, %xmm0
+ret
+
+However, since the reload is only used by these instructions, we could
+"fold" it into the uses, producing something like this:
+
+movaps c(%esp), %xmm1
+movaps %xmm1, c2(%esp)
+...
+
+movss c2(%esp), %xmm0
+ret
+
+... saving two instructions.
+
+The basic idea is that a reload from a spill slot, can, if only one 4-byte
+chunk is used, bring in 3 zeros the the one element instead of 4 elements.
+This can be used to simplify a variety of shuffle operations, where the
+elements are fixed zeros.
+
+//===---------------------------------------------------------------------===//
+