diff options
author | Eli Friedman <eli.friedman@gmail.com> | 2010-06-09 02:43:17 +0000 |
---|---|---|
committer | Eli Friedman <eli.friedman@gmail.com> | 2010-06-09 02:43:17 +0000 |
commit | 092f9c116abe9fe95da16815f1a0654a24f3723f (patch) | |
tree | 1d7af54530daeaa5068d21a613c971c32733a799 /lib/Target/X86/README-X86-64.txt | |
parent | d84712421121744797210a7814aafce8c5377d92 (diff) |
A few new x86-64 specific README entries.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@105674 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/README-X86-64.txt')
-rw-r--r-- | lib/Target/X86/README-X86-64.txt | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt index 86b16ec406..b7ebc461f3 100644 --- a/lib/Target/X86/README-X86-64.txt +++ b/lib/Target/X86/README-X86-64.txt @@ -74,6 +74,15 @@ gcc: movq %rax, (%rdx) ret +And the codegen is even worse for the following +(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103): + void fill1(char *s, int a) + { + __builtin_memset(s, a, 15); + } + +For this version, we duplicate the computation of the constant to store. + //===---------------------------------------------------------------------===// It's not possible to reference AH, BH, CH, and DH registers in an instruction @@ -158,3 +167,76 @@ be able to recognize the zero extend. This could also presumably be implemented if we have whole-function selectiondags. //===---------------------------------------------------------------------===// + +Take the following C code +(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43640): + +struct u1 +{ + float x; + float y; +}; + +float foo(struct u1 u) +{ + return u.x + u.y; +} + +Optimizes to the following IR: +define float @foo(double %u.0) nounwind readnone { +entry: + %tmp8 = bitcast double %u.0 to i64 ; <i64> [#uses=2] + %tmp6 = trunc i64 %tmp8 to i32 ; <i32> [#uses=1] + %tmp7 = bitcast i32 %tmp6 to float ; <float> [#uses=1] + %tmp2 = lshr i64 %tmp8, 32 ; <i64> [#uses=1] + %tmp3 = trunc i64 %tmp2 to i32 ; <i32> [#uses=1] + %tmp4 = bitcast i32 %tmp3 to float ; <float> [#uses=1] + %0 = fadd float %tmp7, %tmp4 ; <float> [#uses=1] + ret float %0 +} + +And current llvm-gcc/clang output: + movd %xmm0, %rax + movd %eax, %xmm1 + shrq $32, %rax + movd %eax, %xmm0 + addss %xmm1, %xmm0 + ret + +We really shouldn't move the floats to RAX, only to immediately move them +straight back to the XMM registers. + +There really isn't any good way to handle this purely in IR optimizers; it +could possibly be handled by changing the output of the fronted, though. It +would also be feasible to add a x86-specific DAGCombine to optimize the +bitcast+trunc+(lshr+)bitcast combination. + +//===---------------------------------------------------------------------===// + +Take the following code +(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34653): +extern unsigned long table[]; +unsigned long foo(unsigned char *p) { + unsigned long tag = *p; + return table[tag >> 4] + table[tag & 0xf]; +} + +Current code generated: + movzbl (%rdi), %eax + movq %rax, %rcx + andq $240, %rcx + shrq %rcx + andq $15, %rax + movq table(,%rax,8), %rax + addq table(%rcx), %rax + ret + +Issues: +1. First movq should be movl; saves a byte. +2. Both andq's should be andl; saves another two bytes. I think this was + implemented at one point, but subsequently regressed. +3. shrq should be shrl; saves another byte. +4. The first andq can be completely eliminated by using a slightly more + expensive addressing mode. + +//===---------------------------------------------------------------------===// |