A few new x86-64 specific README entries.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@105674 91177308-0d34-0410-b5e6-96231b3b80d8
author: Eli Friedman <eli.friedman@gmail.com> 2010-06-09 02:43:17 +0000
committer: Eli Friedman <eli.friedman@gmail.com> 2010-06-09 02:43:17 +0000
commit: 092f9c116abe9fe95da16815f1a0654a24f3723f (patch)
tree: 1d7af54530daeaa5068d21a613c971c32733a799 /lib/Target/X86/README-X86-64.txt
parent: d84712421121744797210a7814aafce8c5377d92 (diff)
1 files changed, 82 insertions, 0 deletions
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
index 86b16ec406..b7ebc461f3 100644
--- a/lib/Target/X86/README-X86-64.txt
+++ b/lib/Target/X86/README-X86-64.txt
@@ -74,6 +74,15 @@ gcc:
 	movq	%rax, (%rdx)
 	ret
 
+And the codegen is even worse for the following
+(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103):
+  void fill1(char *s, int a)
+  {
+    __builtin_memset(s, a, 15);
+  }
+
+For this version, we duplicate the computation of the constant to store.
+
 //===---------------------------------------------------------------------===//
 
 It's not possible to reference AH, BH, CH, and DH registers in an instruction
@@ -158,3 +167,76 @@ be able to recognize the zero extend.  This could also presumably be implemented
 if we have whole-function selectiondags.
 
 //===---------------------------------------------------------------------===//
+
+Take the following C code
+(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43640):
+
+struct u1
+{
+        float x;
+        float y;
+};
+
+float foo(struct u1 u)
+{
+        return u.x + u.y;
+}
+
+Optimizes to the following IR:
+define float @foo(double %u.0) nounwind readnone {
+entry:
+  %tmp8 = bitcast double %u.0 to i64              ; <i64> [#uses=2]
+  %tmp6 = trunc i64 %tmp8 to i32                  ; <i32> [#uses=1]
+  %tmp7 = bitcast i32 %tmp6 to float              ; <float> [#uses=1]
+  %tmp2 = lshr i64 %tmp8, 32                      ; <i64> [#uses=1]
+  %tmp3 = trunc i64 %tmp2 to i32                  ; <i32> [#uses=1]
+  %tmp4 = bitcast i32 %tmp3 to float              ; <float> [#uses=1]
+  %0 = fadd float %tmp7, %tmp4                    ; <float> [#uses=1]
+  ret float %0
+}
+
+And current llvm-gcc/clang output:
+	movd	%xmm0, %rax
+	movd	%eax, %xmm1
+	shrq	$32, %rax
+	movd	%eax, %xmm0
+	addss	%xmm1, %xmm0
+	ret
+
+We really shouldn't move the floats to RAX, only to immediately move them
+straight back to the XMM registers.
+
+There really isn't any good way to handle this purely in IR optimizers; it
+could possibly be handled by changing the output of the fronted, though.  It
+would also be feasible to add a x86-specific DAGCombine to optimize the
+bitcast+trunc+(lshr+)bitcast combination.
+
+//===---------------------------------------------------------------------===//
+
+Take the following code
+(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34653):
+extern unsigned long table[];
+unsigned long foo(unsigned char *p) {
+  unsigned long tag = *p;
+  return table[tag >> 4] + table[tag & 0xf];
+}
+
+Current code generated:
+	movzbl	(%rdi), %eax
+	movq	%rax, %rcx
+	andq	$240, %rcx
+	shrq	%rcx
+	andq	$15, %rax
+	movq	table(,%rax,8), %rax
+	addq	table(%rcx), %rax
+	ret
+
+Issues:
+1. First movq should be movl; saves a byte.
+2. Both andq's should be andl; saves another two bytes.  I think this was
+   implemented at one point, but subsequently regressed.
+3. shrq should be shrl; saves another byte.
+4. The first andq can be completely eliminated by using a slightly more
+   expensive addressing mode.
+
+//===---------------------------------------------------------------------===//
author	Eli Friedman <eli.friedman@gmail.com>	2010-06-09 02:43:17 +0000
committer	Eli Friedman <eli.friedman@gmail.com>	2010-06-09 02:43:17 +0000
commit	092f9c116abe9fe95da16815f1a0654a24f3723f (patch)
tree	1d7af54530daeaa5068d21a613c971c32733a799 /lib/Target/X86/README-X86-64.txt
parent	d84712421121744797210a7814aafce8c5377d92 (diff)