6 files changed, 347 insertions, 0 deletions
diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll
new file mode 100644
index 0000000000..464b6ece0e
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/cast.ll
@@ -0,0 +1,158 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+define i32 @casts() {
+
+    ; -- scalars --
+  ; CHECK: cost of 1 {{.*}} sext
+  %r0 = sext i1 undef to i8
+  ; CHECK: cost of 1 {{.*}} zext
+  %r1 = zext i1 undef to i8
+  ; CHECK: cost of 1 {{.*}} sext
+  %r2 = sext i1 undef to i16
+  ; CHECK: cost of 1 {{.*}} zext
+  %r3 = zext i1 undef to i16
+  ; CHECK: cost of 1 {{.*}} sext
+  %r4 = sext i1 undef to i32
+  ; CHECK: cost of 1 {{.*}} zext
+  %r5 = zext i1 undef to i32
+  ; CHECK: cost of 1 {{.*}} sext
+  %r6 = sext i1 undef to i64
+  ; CHECK: cost of 1 {{.*}} zext
+  %r7 = zext i1 undef to i64
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r8 = trunc i8 undef to i1
+  ; CHECK: cost of 1 {{.*}} sext
+  %r9 = sext i8 undef to i16
+  ; CHECK: cost of 1 {{.*}} zext
+  %r10 = zext i8 undef to i16
+  ; CHECK: cost of 1 {{.*}} sext
+  %r11 = sext i8 undef to i32
+  ; CHECK: cost of 1 {{.*}} zext
+  %r12 = zext i8 undef to i32
+  ; CHECK: cost of 1 {{.*}} sext
+  %r13 = sext i8 undef to i64
+  ; CHECK: cost of 1 {{.*}} zext
+  %r14 = zext i8 undef to i64
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r15 = trunc i16 undef to i1
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r16 = trunc i16 undef to i8
+  ; CHECK: cost of 1 {{.*}} sext
+  %r17 = sext i16 undef to i32
+  ; CHECK: cost of 1 {{.*}} zext
+  %r18 = zext i16 undef to i32
+  ; CHECK: cost of 2 {{.*}} sext
+  %r19 = sext i16 undef to i64
+  ; CHECK: cost of 1 {{.*}} zext
+  %r20 = zext i16 undef to i64
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r21 = trunc i32 undef to i1
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r22 = trunc i32 undef to i8
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r23 = trunc i32 undef to i16
+  ; CHECK: cost of 1 {{.*}} sext
+  %r24 = sext i32 undef to i64
+  ; CHECK: cost of 1 {{.*}} zext
+  %r25 = zext i32 undef to i64
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r26 = trunc i64 undef to i1
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r27 = trunc i64 undef to i8
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r28 = trunc i64 undef to i16
+  ; CHECK: cost of 0 {{.*}} trunc
+  %r29 = trunc i64 undef to i32
+
+    ; -- floating point conversions --
+  ; Moves between scalar and NEON registers.
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r30 = fptoui float undef to i1
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r31 = fptosi float undef to i1
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r32 = fptoui float undef to i8
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r33 = fptosi float undef to i8
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r34 = fptoui float undef to i16
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r35 = fptosi float undef to i16
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r36 = fptoui float undef to i32
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r37 = fptosi float undef to i32
+  ; CHECK: cost of 10 {{.*}} fptoui
+  %r38 = fptoui float undef to i64
+  ; CHECK: cost of 10 {{.*}} fptosi
+  %r39 = fptosi float undef to i64
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r40 = fptoui double undef to i1
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r41 = fptosi double undef to i1
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r42 = fptoui double undef to i8
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r43 = fptosi double undef to i8
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r44 = fptoui double undef to i16
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r45 = fptosi double undef to i16
+  ; CHECK: cost of 2 {{.*}} fptoui
+  %r46 = fptoui double undef to i32
+  ; CHECK: cost of 2 {{.*}} fptosi
+  %r47 = fptosi double undef to i32
+  ; Function call
+  ; CHECK: cost of 10 {{.*}} fptoui
+  %r48 = fptoui double undef to i64
+  ; CHECK: cost of 10 {{.*}} fptosi
+  %r49 = fptosi double undef to i64
+
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r50 = sitofp i1 undef to float
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r51 = uitofp i1 undef to float
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r52 = sitofp i1 undef to double
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r53 = uitofp i1 undef to double
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r54 = sitofp i8 undef to float
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r55 = uitofp i8 undef to float
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r56 = sitofp i8 undef to double
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r57 = uitofp i8 undef to double
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r58 = sitofp i16 undef to float
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r59 = uitofp i16 undef to float
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r60 = sitofp i16 undef to double
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r61 = uitofp i16 undef to double
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r62 = sitofp i32 undef to float
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r63 = uitofp i32 undef to float
+  ; CHECK: cost of 2 {{.*}} sitofp
+  %r64 = sitofp i32 undef to double
+  ; CHECK: cost of 2 {{.*}} uitofp
+  %r65 = uitofp i32 undef to double
+  ; Function call
+  ; CHECK: cost of 10 {{.*}} sitofp
+  %r66 = sitofp i64 undef to float
+  ; CHECK: cost of 10 {{.*}} uitofp
+  %r67 = uitofp i64 undef to float
+  ; CHECK: cost of 10 {{.*}} sitofp
+  %r68 = sitofp i64 undef to double
+  ; CHECK: cost of 10 {{.*}} uitofp
+  %r69 = uitofp i64 undef to double
+
+  ;CHECK: cost of 0 {{.*}} ret
+  ret i32 undef
+}
+
diff --git a/test/Analysis/CostModel/ARM/gep.ll b/test/Analysis/CostModel/ARM/gep.ll
new file mode 100644
index 0000000000..a63b87d2ad
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/gep.ll
@@ -0,0 +1,43 @@
+; RUN: opt -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+define void @test_geps() {
+  ; Cost of scalar integer geps should be one. We can't always expect it to be
+  ; folded into the instruction addressing mode.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8*
+  %a0 = getelementptr inbounds i8* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16*
+  %a1 = getelementptr inbounds i16* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32*
+  %a2 = getelementptr inbounds i32* undef, i32 0
+
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64*
+  %a3 = getelementptr inbounds i64* undef, i32 0
+
+  ; Cost of scalar floating point geps should be one. We cannot fold the address
+  ; computation.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float*
+  %a4 = getelementptr inbounds float* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double*
+  %a5 = getelementptr inbounds double* undef, i32 0
+
+
+  ; Cost of vector geps should be one. We cannot fold the address computation.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>*
+  %a7 = getelementptr inbounds <4 x i8>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>*
+  %a8 = getelementptr inbounds <4 x i16>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>*
+  %a9 = getelementptr inbounds <4 x i32>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>*
+  %a10 = getelementptr inbounds <4 x i64>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>*
+  %a11 = getelementptr inbounds <4 x float>* undef, i32 0
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>*
+  %a12 = getelementptr inbounds <4 x double>* undef, i32 0
+
+
+  ret void
+}
diff --git a/test/Analysis/CostModel/ARM/insertelement.ll b/test/Analysis/CostModel/ARM/insertelement.ll
new file mode 100644
index 0000000000..f951b08f9b
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/insertelement.ll
@@ -0,0 +1,46 @@
+; RUN: opt -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+; Multiple insert elements from loads into d subregisters are expensive on swift
+; due to renaming constraints.
+%T_i8v = type <8 x i8>
+%T_i8 = type i8
+; CHECK: insertelement_i8
+define void @insertelement_i8(%T_i8* %saddr,
+                           %T_i8v* %vaddr) {
+  %v0 = load %T_i8v* %vaddr
+  %v1 = load %T_i8* %saddr
+;CHECK: estimated cost of 3 for {{.*}} insertelement <8 x i8>
+  %v2 = insertelement %T_i8v %v0, %T_i8 %v1, i32 1
+  store %T_i8v %v2, %T_i8v* %vaddr
+  ret void
+}
+
+
+%T_i16v = type <4 x i16>
+%T_i16 = type i16
+; CHECK: insertelement_i16
+define void @insertelement_i16(%T_i16* %saddr,
+                           %T_i16v* %vaddr) {
+  %v0 = load %T_i16v* %vaddr
+  %v1 = load %T_i16* %saddr
+;CHECK: estimated cost of 3 for {{.*}} insertelement <4 x i16>
+  %v2 = insertelement %T_i16v %v0, %T_i16 %v1, i32 1
+  store %T_i16v %v2, %T_i16v* %vaddr
+  ret void
+}
+
+%T_i32v = type <2 x i32>
+%T_i32 = type i32
+; CHECK: insertelement_i32
+define void @insertelement_i32(%T_i32* %saddr,
+                           %T_i32v* %vaddr) {
+  %v0 = load %T_i32v* %vaddr
+  %v1 = load %T_i32* %saddr
+;CHECK: estimated cost of 3 for {{.*}} insertelement <2 x i32>
+  %v2 = insertelement %T_i32v %v0, %T_i32 %v1, i32 1
+  store %T_i32v %v2, %T_i32v* %vaddr
+  ret void
+}
diff --git a/test/Analysis/CostModel/ARM/lit.local.cfg b/test/Analysis/CostModel/ARM/lit.local.cfg
new file mode 100644
index 0000000000..cb77b09ef4
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/Analysis/CostModel/ARM/select.ll b/test/Analysis/CostModel/ARM/select.ll
new file mode 100644
index 0000000000..96afccfc8c
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/select.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+; CHECK: casts
+define void @casts() {
+    ; Scalar values
+  ; CHECK: cost of 1 {{.*}} select
+  %v1 = select i1 undef, i8 undef, i8 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v2 = select i1 undef, i16 undef, i16 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v3 = select i1 undef, i32 undef, i32 undef
+  ; CHECK: cost of 2 {{.*}} select
+  %v4 = select i1 undef, i64 undef, i64 undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v5 = select i1 undef, float undef, float undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v6 = select i1 undef, double undef, double undef
+
+    ; Vector values
+  ; CHECK: cost of 1 {{.*}} select
+  %v7 = select <2 x i1> undef, <2 x i8> undef, <2 x i8> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v8 = select <4 x i1>  undef, <4 x i8> undef, <4 x i8> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v9 = select <8 x i1>  undef, <8 x i8> undef, <8 x i8> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v10 = select <16 x i1>  undef, <16 x i8> undef, <16 x i8> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v11 = select <2 x i1> undef, <2 x i16> undef, <2 x i16> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v12 = select <4 x i1>  undef, <4 x i16> undef, <4 x i16> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v13 = select <8 x i1>  undef, <8 x i16> undef, <8 x i16> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v15 = select <4 x i1>  undef, <4 x i32> undef, <4 x i32> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v16 = select <2 x i1> undef, <2 x i64> undef, <2 x i64> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v17 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+  ; CHECK: cost of 1 {{.*}} select
+  %v18 = select <4 x i1>  undef, <4 x float> undef, <4 x float> undef
+
+  ; CHECK: cost of 1 {{.*}} select
+  %v19 = select <2 x i1>  undef, <2 x double> undef, <2 x double> undef
+
+  ret void
+}
diff --git a/test/Analysis/CostModel/ARM/shuffle.ll b/test/Analysis/CostModel/ARM/shuffle.ll
new file mode 100644
index 0000000000..c92d668804
--- /dev/null
+++ b/test/Analysis/CostModel/ARM/shuffle.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=thumbv7-apple-ios6.0.0 -mcpu=swift | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios6.0.0"
+
+; CHECK: shuffle
+define void @shuffle() {
+
+
+  ;; Reverse shuffles should be lowered to vrev and possibly a vext (for
+  ;; quadwords)
+
+    ; Vector values
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v7 = shufflevector <2 x i8> undef, <2 x i8>undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v8 = shufflevector <4 x i8> undef, <4 x i8>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v9 = shufflevector <8 x i8> undef, <8 x i8>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ; CHECK: cost of 2 {{.*}} shuffle
+  %v10 = shufflevector <16 x i8> undef, <16 x i8>undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v11 = shufflevector <2 x i16> undef, <2 x i16>undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v12 = shufflevector <4 x i16> undef, <4 x i16>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ; CHECK: cost of 2 {{.*}} shuffle
+  %v13 = shufflevector <8 x i16> undef, <8 x i16>undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v14 = shufflevector <2 x i32> undef, <2 x i32>undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK: cost of 2 {{.*}} shuffle
+  %v15 = shufflevector <4 x i32> undef, <4 x i32>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+
+  ; CHECK: cost of 1 {{.*}} shuffle
+  %v16 = shufflevector <2 x float> undef, <2 x float>undef, <2 x i32> <i32 1, i32 0>
+  ; CHECK: cost of 2 {{.*}} shuffle
+  %v17 = shufflevector <4 x float> undef, <4 x float>undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+
+  ret void
+}