Consider this code snippet:

float t1(int argc) { return (argc == 1123) ? 1.234f : 2.38213f; } We would generate truly awful code on ARM (those with a weak stomach should look away): _t1: movw r1, #1123 movs r2, #1 movs r3, #0 cmp r0, r1 mov.w r0, #0 it eq moveq r0, r2 movs r1, #4 cmp r0, #0 it ne movne r3, r1 adr r0, #LCPI1_0 ldr r0, [r0, r3] bx lr The problem was that legalization was creating a cascade of SELECT_CC nodes, for for the comparison of "argc == 1123" which was fed into a SELECT node for the ?: statement which was itself converted to a SELECT_CC node. This is because the ARM back-end doesn't have custom lowering for SELECT nodes, so it used the default "Expand". I added a fairly simple "LowerSELECT" to the ARM back-end. It takes care of this testcase, but can obviously be expanded to include more cases. Now we generate this, which looks optimal to me: _t1: movw r1, #1123 movs r2, #0 cmp r0, r1 adr r0, #LCPI0_0 it eq moveq r2, #4 ldr r0, [r0, r2] bx lr .align 2 LCPI0_0: .long 1075344593 @ float 2.382130e+00 .long 1067316150 @ float 1.234000e+00 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@110799 91177308-0d34-0410-b5e6-96231b3b80d8
author: Bill Wendling <isanbard@gmail.com> 2010-08-11 08:43:16 +0000
committer: Bill Wendling <isanbard@gmail.com> 2010-08-11 08:43:16 +0000
commit: de2b151dbf125af49717807b9cfc1f6f7a5b9ea6 (patch)
tree: 0966dab4e700d13996e7fdf0a9556afde6483a8f /test
parent: 7b4d31176efe6894bcfaa05257dd5783acda5ddc (diff)
1 files changed, 25 insertions, 0 deletions
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 29c55c6bd9..7413bed5c5 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=arm | FileCheck %s
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
+; RUN: llc < %s -mattr=+neon,+thumb2 -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=CHECK-NEON
 
 define i32 @f1(i32 %a.s) {
 ;CHECK: f1:
@@ -65,3 +66,27 @@ define double @f7(double %a, double %b) {
     %tmp1 = select i1 %tmp, double -1.000e+00, double %b
     ret double %tmp1
 }
+
+; <rdar://problem/7260094>
+;
+; We used to generate really horrible code for this function. The main cause was
+; a lack of a custom lowering routine for an ISD::SELECT. This would result in
+; two "it" blocks in the code: one for the "icmp" and another to move the index
+; into the constant pool based on the value of the "icmp". If we have one "it"
+; block generated, odds are good that we have close to the ideal code for this:
+;
+; CHECK-NEON:      _f8:
+; CHECK-NEON:      movw   [[REGISTER_1:r[0-9]+]], #1123
+; CHECK-NEON-NEXT: movs   [[REGISTER_2:r[0-9]+]], #0
+; CHECK-NEON-NEXT: cmp    r0, [[REGISTER_1]]
+; CHECK-NEON-NEXT: adr    [[REGISTER_3:r[0-9]+]], #LCPI
+; CHECK-NEON-NEXT: it     eq
+; CHECK-NEON-NEXT: moveq  [[REGISTER_2]], #4
+; CHECK-NEON-NEXT: ldr
+; CHECK-NEON:      bx
+
+define arm_apcscc float @f8(i32 %a) nounwind {
+  %tmp = icmp eq i32 %a, 1123
+  %tmp1 = select i1 %tmp, float 0x3FF3BE76C0000000, float 0x40030E9A20000000
+  ret float %tmp1
+}
author	Bill Wendling <isanbard@gmail.com>	2010-08-11 08:43:16 +0000
committer	Bill Wendling <isanbard@gmail.com>	2010-08-11 08:43:16 +0000
commit	de2b151dbf125af49717807b9cfc1f6f7a5b9ea6 (patch)
tree	0966dab4e700d13996e7fdf0a9556afde6483a8f /test
parent	7b4d31176efe6894bcfaa05257dd5783acda5ddc (diff)