36 files changed, 2724 insertions, 6 deletions
diff --git a/test/CodeGen/Hexagon/absaddr-store.ll b/test/CodeGen/Hexagon/absaddr-store.ll
new file mode 100644
index 0000000000..5c2554df8a
--- /dev/null
+++ b/test/CodeGen/Hexagon/absaddr-store.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate load instructions with absolute addressing mode.
+
+@a = external global i32
+@b = external global i8
+@c = external global i16
+@d = external global i64
+
+define zeroext i8 @absStoreByte() nounwind {
+; CHECK: memb(##b){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %0 = load i8* @b, align 1
+  %conv = zext i8 %0 to i32
+  %mul = mul nsw i32 100, %conv
+  %conv1 = trunc i32 %mul to i8
+  store i8 %conv1, i8* @b, align 1
+  ret i8 %conv1
+}
+
+define signext i16 @absStoreHalf() nounwind {
+; CHECK: memh(##c){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %0 = load i16* @c, align 2
+  %conv = sext i16 %0 to i32
+  %mul = mul nsw i32 100, %conv
+  %conv1 = trunc i32 %mul to i16
+  store i16 %conv1, i16* @c, align 2
+  ret i16 %conv1
+}
+
+define i32 @absStoreWord() nounwind {
+; CHECK: memw(##a){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %0 = load i32* @a, align 4
+  %mul = mul nsw i32 100, %0
+  store i32 %mul, i32* @a, align 4
+  ret i32 %mul
+}
+
+define void @absStoreDouble() nounwind {
+; CHECK: memd(##d){{ *}}={{ *}}r{{[0-9]+}}:{{[0-9]+}}
+entry:
+  store i64 100, i64* @d, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/Hexagon/adde.ll b/test/CodeGen/Hexagon/adde.ll
new file mode 100644
index 0000000000..9cee3e215d
--- /dev/null
+++ b/test/CodeGen/Hexagon/adde.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+:[0-9]+}} = #0
+; CHECK: r{{[0-9]+:[0-9]+}} = #1
+; CHECK: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+
+
+define void @check_adde_addc (i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+        %tmp1 = zext i64 %AL to i128
+        %tmp23 = zext i64 %AH to i128
+        %tmp4 = shl i128 %tmp23, 64
+        %tmp5 = or i128 %tmp4, %tmp1
+        %tmp67 = zext i64 %BL to i128
+        %tmp89 = zext i64 %BH to i128
+        %tmp11 = shl i128 %tmp89, 64
+        %tmp12 = or i128 %tmp11, %tmp67
+        %tmp15 = add i128 %tmp12, %tmp5
+        %tmp1617 = trunc i128 %tmp15 to i64
+        store i64 %tmp1617, i64* %RL
+        %tmp21 = lshr i128 %tmp15, 64
+        %tmp2122 = trunc i128 %tmp21 to i64
+        store i64 %tmp2122, i64* %RH
+        ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll
index 767a442612..f8c9e44c83 100644
--- a/test/CodeGen/Hexagon/args.ll
+++ b/test/CodeGen/Hexagon/args.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hexagon-misched < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched -disable-hexagon-misched < %s | FileCheck %s
 ; CHECK: memw(r29{{ *}}+{{ *}}#0){{ *}}={{ *}}#7
-; CHECK: r5 = #6
 ; CHECK: r0 = #1
 ; CHECK: r1 = #2
 ; CHECK: r2 = #3
 ; CHECK: r3 = #4
 ; CHECK: r4 = #5
+; CHECK: r5 = #6
 
 
 define void @foo() nounwind {
diff --git a/test/CodeGen/Hexagon/block-addr.ll b/test/CodeGen/Hexagon/block-addr.ll
new file mode 100644
index 0000000000..54a12bf484
--- /dev/null
+++ b/test/CodeGen/Hexagon/block-addr.ll
@@ -0,0 +1,64 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}} = CONST32(#.LJTI{{[0-9]+_[0-9]+}})
+; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}+r{{[0-9]+<<#[0-9]+}})
+; CHECK: jumpr r{{[0-9]+}}
+
+define void @main() #0 {
+entry:
+  %ret = alloca i32, align 4
+  br label %while.body
+
+while.body:
+  %ret.0.load17 = load volatile i32* %ret, align 4
+  switch i32 %ret.0.load17, label %label6 [
+    i32 0, label %label0
+    i32 1, label %label1
+    i32 2, label %label2
+    i32 3, label %label3
+    i32 4, label %label4
+    i32 5, label %label5
+  ]
+
+label0:
+  %ret.0.load18 = load volatile i32* %ret, align 4
+  %inc = add nsw i32 %ret.0.load18, 1
+  store volatile i32 %inc, i32* %ret, align 4
+  br label %while.body
+
+label1:
+  %ret.0.load19 = load volatile i32* %ret, align 4
+  %inc2 = add nsw i32 %ret.0.load19, 1
+  store volatile i32 %inc2, i32* %ret, align 4
+  br label %while.body
+
+label2:
+  %ret.0.load20 = load volatile i32* %ret, align 4
+  %inc4 = add nsw i32 %ret.0.load20, 1
+  store volatile i32 %inc4, i32* %ret, align 4
+  br label %while.body
+
+label3:
+  %ret.0.load21 = load volatile i32* %ret, align 4
+  %inc6 = add nsw i32 %ret.0.load21, 1
+  store volatile i32 %inc6, i32* %ret, align 4
+  br label %while.body
+
+label4:
+  %ret.0.load22 = load volatile i32* %ret, align 4
+  %inc8 = add nsw i32 %ret.0.load22, 1
+  store volatile i32 %inc8, i32* %ret, align 4
+  br label %while.body
+
+label5:
+  %ret.0.load23 = load volatile i32* %ret, align 4
+  %inc10 = add nsw i32 %ret.0.load23, 1
+  store volatile i32 %inc10, i32* %ret, align 4
+  br label %while.body
+
+label6:
+  store volatile i32 0, i32* %ret, align 4
+  br label %while.body
+}
+
+attributes #0 = { noreturn nounwind "target-cpu"="hexagonv4" }
diff --git a/test/CodeGen/Hexagon/cext-check.ll b/test/CodeGen/Hexagon/cext-check.ll
new file mode 100644
index 0000000000..7c4b19e5a4
--- /dev/null
+++ b/test/CodeGen/Hexagon/cext-check.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we constant extended instructions only when necessary.
+
+define i32 @cext_test1(i32* %a) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}+##8000)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##300000)
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}+##4092)
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##300)
+entry:
+  %0 = load i32* %a, align 4
+  %tobool = icmp ne i32 %0, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  %arrayidx1 = getelementptr inbounds i32* %a, i32 2000
+  %1 = load i32* %arrayidx1, align 4
+  %add = add nsw i32 %1, 300000
+  br label %return
+
+if.end:
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 1023
+  %2 = load i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %2, 300
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add3, %if.end ]
+  ret i32 %retval.0
+}
+
+define i32 @cext_test2(i8* %a) nounwind {
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}+##1023)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##300000)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}+##1024)
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##6000)
+entry:
+  %tobool = icmp ne i8* %a, null
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+  %arrayidx = getelementptr inbounds i8* %a, i32 1023
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 300000
+  br label %return
+
+if.end:
+  %arrayidx1 = getelementptr inbounds i8* %a, i32 1024
+  %1 = load i8* %arrayidx1, align 1
+  %conv2 = zext i8 %1 to i32
+  %add3 = add nsw i32 %conv2, 6000
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %add, %if.then ], [ %add3, %if.end ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/Hexagon/cext-valid-packet1.ll b/test/CodeGen/Hexagon/cext-valid-packet1.ll
new file mode 100644
index 0000000000..a479d37e4a
--- /dev/null
+++ b/test/CodeGen/Hexagon/cext-valid-packet1.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; Check that the packetizer generates valid packets with constant
+; extended instructions.
+; CHECK: {
+; CHECK-NEXT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}, ##{{[0-9]+}})
+; CHECK-NEXT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}, ##{{[0-9]+}})
+; CHECK-NEXT: }
+
+define i32 @check-packet1(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %add = add nsw i32 %a, 200000
+  %add1 = add nsw i32 %b, 200001
+  %add2 = add nsw i32 %c, 200002
+  %cmp = icmp sgt i32 %add, %add1
+  %b.addr.0 = select i1 %cmp, i32 %add1, i32 %add2
+  ret i32 %b.addr.0
+}
diff --git a/test/CodeGen/Hexagon/cext-valid-packet2.ll b/test/CodeGen/Hexagon/cext-valid-packet2.ll
new file mode 100644
index 0000000000..2788a6b1c8
--- /dev/null
+++ b/test/CodeGen/Hexagon/cext-valid-packet2.ll
@@ -0,0 +1,43 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that the packetizer generates valid packets with constant
+; extended add and base+offset store instructions.
+
+; CHECK: {
+; CHECK-NEXT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}, ##{{[0-9]+}})
+; CHECK-NEXT: memw(r{{[0-9]+}}+{{ *}}##{{[0-9]+}}){{ *}}={{ *}}r{{[0-9]+}}.new
+; CHECK-NEXT: }
+
+define i32 @test(i32* nocapture %a, i32* nocapture %b, i32 %c) nounwind {
+entry:
+  %add = add nsw i32 %c, 200002
+  %0 = load i32* %a, align 4
+  %add1 = add nsw i32 %0, 200000
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 3000
+  store i32 %add1, i32* %arrayidx2, align 4
+  %1 = load i32* %b, align 4
+  %add4 = add nsw i32 %1, 200001
+  %arrayidx5 = getelementptr inbounds i32* %a, i32 1
+  store i32 %add4, i32* %arrayidx5, align 4
+  %arrayidx7 = getelementptr inbounds i32* %b, i32 1
+  %2 = load i32* %arrayidx7, align 4
+  %cmp = icmp sgt i32 %add4, %2
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %arrayidx8 = getelementptr inbounds i32* %a, i32 2
+  %3 = load i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %b, i32 2000
+  %4 = load i32* %arrayidx9, align 4
+  %sub = sub nsw i32 %3, %4
+  %arrayidx10 = getelementptr inbounds i32* %a, i32 4000
+  store i32 %sub, i32* %arrayidx10, align 4
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %arrayidx11 = getelementptr inbounds i32* %b, i32 3200
+  store i32 %add, i32* %arrayidx11, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret i32 %add
+}
diff --git a/test/CodeGen/Hexagon/cmp-to-genreg.ll b/test/CodeGen/Hexagon/cmp-to-genreg.ll
new file mode 100644
index 0000000000..97cf51ce1a
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp-to-genreg.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate compare to general register.
+
+define i32 @compare1(i32 %a) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}cmp.eq(r{{[0-9]+}},{{ *}}#120)
+entry:
+  %cmp = icmp eq i32 %a, 120
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @compare2(i32 %a) nounwind readnone {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}#120)
+entry:
+  %cmp = icmp ne i32 %a, 120
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @compare3(i32 %a, i32 %b) nounwind readnone {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}cmp.eq(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
+entry:
+  %cmp = icmp eq i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @compare4(i32 %a, i32 %b) nounwind readnone {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
+entry:
+  %cmp = icmp ne i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
diff --git a/test/CodeGen/Hexagon/cmp-to-predreg.ll b/test/CodeGen/Hexagon/cmp-to-predreg.ll
new file mode 100644
index 0000000000..d430b90186
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp-to-predreg.ll
@@ -0,0 +1,43 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate compare to predicate register.
+
+define i32 @compare1(i32 %a, i32 %b) nounwind {
+; CHECK: p{{[0-3]}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
+entry:
+  %cmp = icmp ne i32 %a, %b
+  %add = add nsw i32 %a, %b
+  %sub = sub nsw i32 %a, %b
+  %add.sub = select i1 %cmp, i32 %add, i32 %sub
+  ret i32 %add.sub
+}
+
+define i32 @compare2(i32 %a) nounwind {
+; CHECK: p{{[0-3]}}{{ *}}={{ *}}!cmp.eq(r{{[0-9]+}},{{ *}}#10)
+entry:
+  %cmp = icmp ne i32 %a, 10
+  %add = add nsw i32 %a, 10
+  %sub = sub nsw i32 %a, 10
+  %add.sub = select i1 %cmp, i32 %add, i32 %sub
+  ret i32 %add.sub
+}
+
+define i32 @compare3(i32 %a, i32 %b) nounwind {
+; CHECK: p{{[0-3]}}{{ *}}={{ *}}cmp.gt(r{{[0-9]+}},{{ *}}r{{[0-9]+}})
+entry:
+  %cmp = icmp sgt i32 %a, %b
+  %sub = sub nsw i32 %a, %b
+  %add = add nsw i32 %a, %b
+  %sub.add = select i1 %cmp, i32 %sub, i32 %add
+  ret i32 %sub.add
+}
+
+define i32 @compare4(i32 %a) nounwind {
+; CHECK: p{{[0-3]}}{{ *}}={{ *}}cmp.gt(r{{[0-9]+}},{{ *}}#10)
+entry:
+  %cmp = icmp sgt i32 %a, 10
+  %sub = sub nsw i32 %a, 10
+  %add = add nsw i32 %a, 10
+  %sub.add = select i1 %cmp, i32 %sub, i32 %add
+  ret i32 %sub.add
+}
+
diff --git a/test/CodeGen/Hexagon/cmp_pred.ll b/test/CodeGen/Hexagon/cmp_pred.ll
new file mode 100644
index 0000000000..37db3b499f
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp_pred.ll
@@ -0,0 +1,115 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate various cmpb instruction followed by if (p0) .. if (!p0)...
+target triple = "hexagon"
+
+define i32 @Func_3Ugt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ugt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Uge(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp uge i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ult(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ult i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ule(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ule i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ueq(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp eq i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Une(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3UneC(i32 %Enum_Par_Val) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, 122
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3gt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK: mux
+  %cmp = icmp sgt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3ge(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp sge i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3lt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp slt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3le(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp sle i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3eq(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp eq i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3ne(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3neC(i32 %Enum_Par_Val) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, 122
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
diff --git a/test/CodeGen/Hexagon/cmp_pred_reg.ll b/test/CodeGen/Hexagon/cmp_pred_reg.ll
new file mode 100644
index 0000000000..37db3b499f
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp_pred_reg.ll
@@ -0,0 +1,115 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate various cmpb instruction followed by if (p0) .. if (!p0)...
+target triple = "hexagon"
+
+define i32 @Func_3Ugt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ugt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Uge(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp uge i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ult(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ult i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ule(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ule i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Ueq(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp eq i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3Une(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3UneC(i32 %Enum_Par_Val) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, 122
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3gt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK: mux
+  %cmp = icmp sgt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3ge(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp sge i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3lt(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp slt i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3le(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp sle i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3eq(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp eq i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3ne(i32 %Enum_Par_Val, i32 %pv2) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, %pv2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3neC(i32 %Enum_Par_Val) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %cmp = icmp ne i32 %Enum_Par_Val, 122
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
diff --git a/test/CodeGen/Hexagon/cmpb_pred.ll b/test/CodeGen/Hexagon/cmpb_pred.ll
new file mode 100644
index 0000000000..1e6144701f
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmpb_pred.ll
@@ -0,0 +1,92 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; Generate various cmpb instruction followed by if (p0) .. if (!p0)...
+target triple = "hexagon"
+
+@Enum_global = external global i8
+
+define i32 @Func_3(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 255
+  %cmp = icmp eq i32 %conv, 2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3b(i32) nounwind readonly {
+entry:
+; CHECK-NOT: mux
+  %1 = load i8* @Enum_global, align 1, !tbaa !0
+  %2 = trunc i32 %0 to i8
+  %cmp = icmp ne i8 %1, %2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3c(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 255
+  %cmp = icmp eq i32 %conv, 2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3d(i32) nounwind readonly {
+entry:
+; CHECK-NOT: mux
+  %1 = load i8* @Enum_global, align 1, !tbaa !0
+  %2 = trunc i32 %0 to i8
+  %cmp = icmp eq i8 %1, %2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3e(i32) nounwind readonly {
+entry:
+; CHECK-NOT: mux
+  %1 = load i8* @Enum_global, align 1, !tbaa !0
+  %2 = trunc i32 %0 to i8
+  %cmp = icmp eq i8 %1, %2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3f(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 255
+  %cmp = icmp ugt i32 %conv, 2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3g(i32) nounwind readnone {
+entry:
+; CHECK: mux
+  %conv = and i32 %0, 255
+  %cmp = icmp ult i32 %conv, 3
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3h(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 254
+  %cmp = icmp ult i32 %conv, 2
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+define i32 @Func_3i(i32) nounwind readnone {
+entry:
+; CHECK-NOT: mux
+  %conv = and i32 %0, 254
+  %cmp = icmp ugt i32 %conv, 1
+  %selv = zext i1 %cmp to i32
+  ret i32 %selv
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/combine_ir.ll b/test/CodeGen/Hexagon/combine_ir.ll
new file mode 100644
index 0000000000..921ce9928e
--- /dev/null
+++ b/test/CodeGen/Hexagon/combine_ir.ll
@@ -0,0 +1,55 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: word
+; CHECK: combine(#0
+
+define void @word(i32* nocapture %a) nounwind {
+entry:
+  %0 = load i32* %a, align 4, !tbaa !0
+  %1 = zext i32 %0 to i64
+  %add.ptr = getelementptr inbounds i32* %a, i32 1
+  %2 = load i32* %add.ptr, align 4, !tbaa !0
+  %3 = zext i32 %2 to i64
+  %4 = shl nuw i64 %3, 32
+  %ins = or i64 %4, %1
+  tail call void @bar(i64 %ins) nounwind
+  ret void
+}
+
+declare void @bar(i64)
+
+; CHECK: halfword
+; CHECK: combine(#0
+
+define void @halfword(i16* nocapture %a) nounwind {
+entry:
+  %0 = load i16* %a, align 2, !tbaa !3
+  %1 = zext i16 %0 to i64
+  %add.ptr = getelementptr inbounds i16* %a, i32 1
+  %2 = load i16* %add.ptr, align 2, !tbaa !3
+  %3 = zext i16 %2 to i64
+  %4 = shl nuw nsw i64 %3, 16
+  %ins = or i64 %4, %1
+  tail call void @bar(i64 %ins) nounwind
+  ret void
+}
+
+; CHECK: byte
+; CHECK: combine(#0
+
+define void @byte(i8* nocapture %a) nounwind {
+entry:
+  %0 = load i8* %a, align 1, !tbaa !1
+  %1 = zext i8 %0 to i64
+  %add.ptr = getelementptr inbounds i8* %a, i32 1
+  %2 = load i8* %add.ptr, align 1, !tbaa !1
+  %3 = zext i8 %2 to i64
+  %4 = shl nuw nsw i64 %3, 8
+  %ins = or i64 %4, %1
+  tail call void @bar(i64 %ins) nounwind
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll b/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll
new file mode 100644
index 0000000000..e942f8d0c5
--- /dev/null
+++ b/test/CodeGen/Hexagon/ctlz-cttz-ctpop.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} |= lsr(r{{[0-9]+}}:{{[0-9]+}}, #4)
+; CHECK: r{{[0-9]+}}:{{[0-9]+}} &= lsr(r{{[0-9]+}}:{{[0-9]+}}, #2)
+; CHECK: r{{[0-9]+}} += lsr(r{{[0-9]+}}, #4)
+
+define i32 @foo(i64 %a, i32 %b) nounwind  {
+entry:
+        %tmp0 = tail call i64 @llvm.ctlz.i64( i64 %a, i1 true )
+        %tmp1 = tail call i64 @llvm.cttz.i64( i64 %a, i1 true )
+        %tmp2 = tail call i32 @llvm.ctlz.i32( i32 %b, i1 true )
+        %tmp3 = tail call i32 @llvm.cttz.i32( i32 %b, i1 true )
+        %tmp4 = tail call i64 @llvm.ctpop.i64( i64 %a )
+        %tmp5 = tail call i32 @llvm.ctpop.i32( i32 %b )
+
+
+        %tmp6 = trunc i64 %tmp0 to i32
+        %tmp7 = trunc i64 %tmp1 to i32
+        %tmp8 = trunc i64 %tmp4 to i32
+        %tmp9 = add i32 %tmp6, %tmp7
+        %tmp10 = add i32 %tmp9, %tmp8
+        %tmp11 = add i32 %tmp10, %tmp2
+        %tmp12 = add i32 %tmp11, %tmp3
+        %tmp13 = add i32 %tmp12, %tmp5
+
+        ret i32 %tmp13
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
diff --git a/test/CodeGen/Hexagon/dualstore.ll b/test/CodeGen/Hexagon/dualstore.ll
index 067499530f..f7d7e8bbe7 100644
--- a/test/CodeGen/Hexagon/dualstore.ll
+++ b/test/CodeGen/Hexagon/dualstore.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hexagon-misched < %s | FileCheck %s
 ; Check that we generate dual stores in one packet in V4
 
-; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}={{ *}}#100000
-; CHECK-NEXT: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}={{ *}}#500000
+; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}={{ *}}##500000
+; CHECK-NEXT: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}={{ *}}##100000
 ; CHECK-NEXT: }
 
 @Reg = global i32 0, align 4
diff --git a/test/CodeGen/Hexagon/gp-plus-offset-load.ll b/test/CodeGen/Hexagon/gp-plus-offset-load.ll
new file mode 100644
index 0000000000..a1b80a65f8
--- /dev/null
+++ b/test/CodeGen/Hexagon/gp-plus-offset-load.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate load instructions with global + offset
+
+%struct.struc = type { i8, i8, i16, i32 }
+
+@foo = common global %struct.struc zeroinitializer, align 4
+
+define void @loadWord(i32 %val1, i32 %val2, i32* nocapture %ival) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(##foo{{ *}}+{{ *}}4)
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %0 = load i32* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 3), align 4
+  store i32 %0, i32* %ival, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @loadByte(i32 %val1, i32 %val2, i8* nocapture %ival) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memb(##foo{{ *}}+{{ *}}1)
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %0 = load i8* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 1), align 1
+  store i8 %0, i8* %ival, align 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @loadHWord(i32 %val1, i32 %val2, i16* %ival) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memh(##foo{{ *}}+{{ *}}2)
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %0 = load i16* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 2), align 2
+  store i16 %0, i16* %ival, align 2
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/gp-plus-offset-store.ll b/test/CodeGen/Hexagon/gp-plus-offset-store.ll
new file mode 100644
index 0000000000..c782b30920
--- /dev/null
+++ b/test/CodeGen/Hexagon/gp-plus-offset-store.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate store instructions with global + offset
+
+%struct.struc = type { i8, i8, i16, i32 }
+
+@foo = common global %struct.struc zeroinitializer, align 4
+
+define void @storeByte(i32 %val1, i32 %val2, i8 zeroext %ival) nounwind {
+; CHECK: memb(##foo{{ *}}+{{ *}}1){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i8 %ival, i8* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 1), align 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @storeHW(i32 %val1, i32 %val2, i16 signext %ival) nounwind {
+; CHECK: memh(##foo{{ *}}+{{ *}}2){{ *}}={{ *}}r{{[0-9]+}}
+entry:
+  %cmp = icmp sgt i32 %val1, %val2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i16 %ival, i16* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 2), align 2
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
diff --git a/test/CodeGen/Hexagon/hwloop-cleanup.ll b/test/CodeGen/Hexagon/hwloop-cleanup.ll
new file mode 100644
index 0000000000..6456ebff16
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-cleanup.ll
@@ -0,0 +1,86 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we remove the compare and induction variable instructions
+; after generating hardware loops.
+; Bug 6685.
+
+; CHECK: loop0
+; CHECK-NOT: r{{[0-9]+}}{{.}}={{.}}add(r{{[0-9]+}},{{.}}#-1)
+; CHECK-NOT: cmp.eq
+; CHECK: endloop0
+
+define i32 @test1(i32* nocapture %b, i32 %n) nounwind readonly {
+entry:
+  %cmp1 = icmp sgt i32 %n, 0
+  br i1 %cmp1, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %sum.03 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx.phi = phi i32* [ %arrayidx.inc, %for.body ], [ %b, %for.body.preheader ]
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %0 = load i32* %arrayidx.phi, align 4
+  %add = add nsw i32 %0, %sum.03
+  %inc = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %inc, %n
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.end.loopexit ]
+  ret i32 %sum.0.lcssa
+}
+
+; This test checks that that initial loop count value is removed.
+; CHECK-NOT: ={{.}}#40
+; CHECK: loop0
+; CHECK-NOT: r{{[0-9]+}}{{.}}={{.}}add(r{{[0-9]+}},{{.}}#-1)
+; CHECK-NOT: cmp.eq
+; CHECK: endloop0
+
+define i32 @test2(i32* nocapture %b) nounwind readonly {
+entry:
+  br label %for.body
+
+for.body:
+  %sum.02 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ]
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = load i32* %arrayidx.phi, align 4
+  %add = add nsw i32 %0, %sum.02
+  %inc = add nsw i32 %i.01, 1
+  %exitcond = icmp eq i32 %inc, 40
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 %add
+}
+
+; This test checks that we don't remove the induction variable since it's used.
+; CHECK: loop0
+; CHECK: r{{[0-9]+}}{{.}}={{.}}add(r{{[0-9]+}},{{.}}#1)
+; CHECK-NOT: cmp.eq
+; CHECK: endloop0
+define i32 @test3(i32* nocapture %b) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ]
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  store i32 %i.01, i32* %arrayidx.phi, align 4
+  %inc = add nsw i32 %i.01, 1
+  %exitcond = icmp eq i32 %inc, 40
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i32 0
+}
+
+
diff --git a/test/CodeGen/Hexagon/hwloop-const.ll b/test/CodeGen/Hexagon/hwloop-const.ll
new file mode 100644
index 0000000000..a621c58c63
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-const.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O2 < %s | FileCheck %s
+; ModuleID = 'hwloop-const.c'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon-unknown-linux-gnu"
+
+@b = common global [25000 x i32] zeroinitializer, align 8
+@a = common global [25000 x i32] zeroinitializer, align 8
+@c = common global [25000 x i32] zeroinitializer, align 8
+
+define i32 @hwloop_bug() nounwind {
+entry:
+  br label %for.body
+
+; CHECK: endloop
+for.body:                                         ; preds = %for.body, %entry
+  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [25000 x i32]* @b, i32 0, i32 %i.02
+  store i32 %i.02, i32* %arrayidx, align 4, !tbaa !0
+  %arrayidx1 = getelementptr inbounds [25000 x i32]* @a, i32 0, i32 %i.02
+  store i32 %i.02, i32* %arrayidx1, align 4, !tbaa !0
+  %inc = add nsw i32 %i.02, 1
+  %exitcond = icmp eq i32 %inc, 25000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/hwloop-dbg.ll b/test/CodeGen/Hexagon/hwloop-dbg.ll
new file mode 100644
index 0000000000..eaffa0797a
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-dbg.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -march=hexagon -mcpu=hexagonv4 -O2 -disable-lsr | FileCheck %s
+; ModuleID = 'hwloop-dbg.o'
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define void @foo(i32* nocapture %a, i32* nocapture %b) nounwind {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32* %a}, i64 0, metadata !13), !dbg !17
+  tail call void @llvm.dbg.value(metadata !{i32* %b}, i64 0, metadata !14), !dbg !18
+  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !15), !dbg !19
+  br label %for.body, !dbg !19
+
+for.body:                                         ; preds = %for.body, %entry
+; CHECK:     loop0(
+; CHECK-NOT: add({{r[0-9]*}}, #
+; CHECK:     endloop0
+  %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
+  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %b.addr.01 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.body ]
+  %incdec.ptr = getelementptr inbounds i32* %b.addr.01, i32 1, !dbg !21
+  tail call void @llvm.dbg.value(metadata !{i32* %incdec.ptr}, i64 0, metadata !14), !dbg !21
+  %0 = load i32* %b.addr.01, align 4, !dbg !21, !tbaa !23
+  store i32 %0, i32* %arrayidx.phi, align 4, !dbg !21, !tbaa !23
+  %inc = add nsw i32 %i.02, 1, !dbg !26
+  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !15), !dbg !26
+  %exitcond = icmp eq i32 %inc, 10, !dbg !19
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  br i1 %exitcond, label %for.end, label %for.body, !dbg !19
+
+for.end:                                          ; preds = %for.body
+  ret void, !dbg !27
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"hwloop-dbg.c", metadata !"/usr2/kparzysz/s.hex/t", metadata !"QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c] [DW_LANG_C99]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*)* @foo, null, null, metadata !11, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!6 = metadata !{i32 786473, metadata !"hwloop-dbg.c", metadata !"/usr2/kparzysz/s.hex/t", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{null, metadata !9, metadata !9}
+!9 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from int]
+!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!11 = metadata !{metadata !12}
+!12 = metadata !{metadata !13, metadata !14, metadata !15}
+!13 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 1]
+!14 = metadata !{i32 786689, metadata !5, metadata !"b", metadata !6, i32 33554433, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 1]
+!15 = metadata !{i32 786688, metadata !16, metadata !"i", metadata !6, i32 2, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 2]
+!16 = metadata !{i32 786443, metadata !5, i32 1, i32 26, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!17 = metadata !{i32 1, i32 15, metadata !5, null}
+!18 = metadata !{i32 1, i32 23, metadata !5, null}
+!19 = metadata !{i32 3, i32 8, metadata !20, null}
+!20 = metadata !{i32 786443, metadata !16, i32 3, i32 3, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!21 = metadata !{i32 4, i32 5, metadata !22, null}
+!22 = metadata !{i32 786443, metadata !20, i32 3, i32 28, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!23 = metadata !{metadata !"int", metadata !24}
+!24 = metadata !{metadata !"omnipotent char", metadata !25}
+!25 = metadata !{metadata !"Simple C/C++ TBAA"}
+!26 = metadata !{i32 3, i32 23, metadata !20, null}
+!27 = metadata !{i32 6, i32 1, metadata !16, null}
diff --git a/test/CodeGen/Hexagon/hwloop-le.ll b/test/CodeGen/Hexagon/hwloop-le.ll
new file mode 100644
index 0000000000..9c8cec7c2a
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-le.ll
@@ -0,0 +1,438 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+
+
+; CHECK: test_pos1_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos1_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 28395, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 28395, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos2_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 9073, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 9073, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos4_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 21956, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 21956, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos8_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 16782, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 16782, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ir_sle
+; CHECK: loop0
+; a < b
+define void @test_pos16_ir_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 19097, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 19097, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos1_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 14040
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp sle i32 %inc, 14040
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos2_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 13710
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp sle i32 %inc, 13710
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos4_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 9920
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp sle i32 %inc, 9920
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos8_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 18924
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp sle i32 %inc, 18924
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ri_sle
+; CHECK: loop0
+; a < b
+define void @test_pos16_ri_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, 11812
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp sle i32 %inc, 11812
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos1_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos2_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos4_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos8_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_rr_sle
+; CHECK: loop0
+; a < b
+define void @test_pos16_rr_sle(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp sle i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp sle i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
diff --git a/test/CodeGen/Hexagon/hwloop-lt.ll b/test/CodeGen/Hexagon/hwloop-lt.ll
new file mode 100644
index 0000000000..7e43733da2
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-lt.ll
@@ -0,0 +1,438 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+
+
+; CHECK: test_pos1_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos1_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 8531, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 8531, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos2_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 9152, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 9152, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos4_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 18851, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 18851, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos8_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 25466, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 25466, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ir_slt
+; CHECK: loop0
+; a < b
+define void @test_pos16_ir_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 9295, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 9295, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos1_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 31236
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %inc, 31236
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos2_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 22653
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp slt i32 %inc, 22653
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos4_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1431
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp slt i32 %inc, 1431
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos8_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 22403
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp slt i32 %inc, 22403
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ri_slt
+; CHECK: loop0
+; a < b
+define void @test_pos16_ri_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 21715
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp slt i32 %inc, 21715
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos1_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos2_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos4_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos8_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_rr_slt
+; CHECK: loop0
+; a < b
+define void @test_pos16_rr_slt(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
diff --git a/test/CodeGen/Hexagon/hwloop-lt1.ll b/test/CodeGen/Hexagon/hwloop-lt1.ll
new file mode 100644
index 0000000000..cf5874011e
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-lt1.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate a hardware loop instruction.
+; CHECK: endloop0
+
+@A = common global [400 x i8] zeroinitializer, align 8
+@B = common global [400 x i8] zeroinitializer, align 8
+@C = common global [400 x i8] zeroinitializer, align 8
+
+define void @run() nounwind {
+entry:
+  br label %polly.loop_body
+
+polly.loop_after:                                 ; preds = %polly.loop_body
+  ret void
+
+polly.loop_body:                                  ; preds = %entry, %polly.loop_body
+  %polly.loopiv16 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ]
+  %polly.next_loopiv = add i32 %polly.loopiv16, 4
+  %p_vector_iv14 = or i32 %polly.loopiv16, 1
+  %p_vector_iv3 = add i32 %p_vector_iv14, 1
+  %p_vector_iv415 = or i32 %polly.loopiv16, 3
+  %p_arrayidx = getelementptr [400 x i8]* @A, i32 0, i32 %polly.loopiv16
+  %p_arrayidx5 = getelementptr [400 x i8]* @A, i32 0, i32 %p_vector_iv14
+  %p_arrayidx6 = getelementptr [400 x i8]* @A, i32 0, i32 %p_vector_iv3
+  %p_arrayidx7 = getelementptr [400 x i8]* @A, i32 0, i32 %p_vector_iv415
+  store i8 123, i8* %p_arrayidx, align 1
+  store i8 123, i8* %p_arrayidx5, align 1
+  store i8 123, i8* %p_arrayidx6, align 1
+  store i8 123, i8* %p_arrayidx7, align 1
+  %0 = icmp slt i32 %polly.next_loopiv, 400
+  br i1 %0, label %polly.loop_body, label %polly.loop_after
+}
diff --git a/test/CodeGen/Hexagon/hwloop-ne.ll b/test/CodeGen/Hexagon/hwloop-ne.ll
new file mode 100644
index 0000000000..bceef2a169
--- /dev/null
+++ b/test/CodeGen/Hexagon/hwloop-ne.ll
@@ -0,0 +1,438 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+
+
+; CHECK: test_pos1_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos1_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 32623, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 32623, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos2_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 29554, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 29554, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos4_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 15692, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 15692, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos8_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 10449, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 10449, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ir_ne
+; CHECK: loop0
+; a < b
+define void @test_pos16_ir_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 32087, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ 32087, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos1_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 3472
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp ne i32 %inc, 3472
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos2_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 8730
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp ne i32 %inc, 8730
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos4_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1493
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp ne i32 %inc, 1493
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos8_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1706
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp ne i32 %inc, 1706
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_ri_ne
+; CHECK: loop0
+; a < b
+define void @test_pos16_ri_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, 1886
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp ne i32 %inc, 1886
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos1_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos1_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos2_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos2_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 2
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos4_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos4_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 4
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos8_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos8_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 8
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
+; CHECK: test_pos16_rr_ne
+; CHECK: loop0
+; a < b
+define void @test_pos16_rr_ne(i8* nocapture %p, i32 %a, i32 %b) nounwind {
+entry:
+  %cmp3 = icmp slt i32 %a, %b
+  br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %p, i32 %i.04
+  %0 = load i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nsw i32 %conv, 1
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.04, 16
+  %cmp = icmp ne i32 %inc, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+
+
diff --git a/test/CodeGen/Hexagon/i16_VarArg.ll b/test/CodeGen/Hexagon/i16_VarArg.ll
new file mode 100644
index 0000000000..eb44c2905c
--- /dev/null
+++ b/test/CodeGen/Hexagon/i16_VarArg.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: call __hexagon_{{[A-Z_a-z0-9]+}}
+
+@a_str = internal constant [8 x i8] c"a = %f\0A\00"
+@b_str = internal constant [8 x i8] c"b = %f\0A\00"
+@add_str = internal constant [12 x i8] c"a + b = %f\0A\00"
+@sub_str = internal constant [12 x i8] c"a - b = %f\0A\00"
+@mul_str = internal constant [12 x i8] c"a * b = %f\0A\00"
+@div_str = internal constant [12 x i8] c"b / a = %f\0A\00"
+@rem_str = internal constant [13 x i8] c"b %% a = %f\0A\00"
+@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"
+@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"
+@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"
+@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"
+@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"
+@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"
+@A = global double 2.000000e+00
+@B = global double 5.000000e+00
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+        %a = load double* @A
+        %b = load double* @B
+        %lt_r = fcmp olt double %a, %b
+        %le_r = fcmp ole double %a, %b
+        %gt_r = fcmp ogt double %a, %b
+        %ge_r = fcmp oge double %a, %b
+        %eq_r = fcmp oeq double %a, %b
+        %ne_r = fcmp une double %a, %b
+        %val1 = zext i1 %lt_r to i16
+        %lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0
+        %le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0
+        %gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0
+        %ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0
+        %eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0
+        %ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0
+        call i32 (i8*, ...)* @printf( i8* %lt_s, i16 %val1 )
+        ret i32 0
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/i1_VarArg.ll b/test/CodeGen/Hexagon/i1_VarArg.ll
new file mode 100644
index 0000000000..7dbfb25cd2
--- /dev/null
+++ b/test/CodeGen/Hexagon/i1_VarArg.ll
@@ -0,0 +1,44 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: call __hexagon_{{[_A-Za-z0-9]+}}
+
+@a_str = internal constant [8 x i8] c"a = %f\0A\00"
+@b_str = internal constant [8 x i8] c"b = %f\0A\00"
+@add_str = internal constant [12 x i8] c"a + b = %f\0A\00"
+@sub_str = internal constant [12 x i8] c"a - b = %f\0A\00"
+@mul_str = internal constant [12 x i8] c"a * b = %f\0A\00"
+@div_str = internal constant [12 x i8] c"b / a = %f\0A\00"
+@rem_str = internal constant [13 x i8] c"b %% a = %f\0A\00"
+@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"
+@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"
+@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"
+@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"
+@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"
+@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"
+@A = global double 2.000000e+00
+@B = global double 5.000000e+00
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+        %a = load double* @A
+        %b = load double* @B
+        %lt_r = fcmp olt double %a, %b
+        %le_r = fcmp ole double %a, %b
+        %gt_r = fcmp ogt double %a, %b
+        %ge_r = fcmp oge double %a, %b
+        %eq_r = fcmp oeq double %a, %b
+        %ne_r = fcmp une double %a, %b
+        %lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0
+        %le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0
+        %gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0
+        %ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0
+        %eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0
+        %ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0
+        call i32 (i8*, ...)* @printf( i8* %lt_s, i1 %lt_r )
+        call i32 (i8*, ...)* @printf( i8* %le_s, i1 %le_r )
+        call i32 (i8*, ...)* @printf( i8* %gt_s, i1 %gt_r )
+        call i32 (i8*, ...)* @printf( i8* %ge_s, i1 %ge_r )
+        call i32 (i8*, ...)* @printf( i8* %eq_s, i1 %eq_r )
+        call i32 (i8*, ...)* @printf( i8* %ne_s, i1 %ne_r )
+        ret i32 0
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/i8_VarArg.ll b/test/CodeGen/Hexagon/i8_VarArg.ll
new file mode 100644
index 0000000000..687b178824
--- /dev/null
+++ b/test/CodeGen/Hexagon/i8_VarArg.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: call __hexagon_{{[A-Z_a-z0-9]+}}
+
+@a_str = internal constant [8 x i8] c"a = %f\0A\00"
+@b_str = internal constant [8 x i8] c"b = %f\0A\00"
+@add_str = internal constant [12 x i8] c"a + b = %f\0A\00"
+@sub_str = internal constant [12 x i8] c"a - b = %f\0A\00"
+@mul_str = internal constant [12 x i8] c"a * b = %f\0A\00"
+@div_str = internal constant [12 x i8] c"b / a = %f\0A\00"
+@rem_str = internal constant [13 x i8] c"b %% a = %f\0A\00"
+@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"
+@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"
+@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"
+@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"
+@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"
+@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"
+@A = global double 2.000000e+00
+@B = global double 5.000000e+00
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+        %a = load double* @A
+        %b = load double* @B
+        %lt_r = fcmp olt double %a, %b
+        %le_r = fcmp ole double %a, %b
+        %gt_r = fcmp ogt double %a, %b
+        %ge_r = fcmp oge double %a, %b
+        %eq_r = fcmp oeq double %a, %b
+        %ne_r = fcmp une double %a, %b
+        %val1 = zext i1 %lt_r to i8
+        %lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0
+        %le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0
+        %gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0
+        %ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0
+        %eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0
+        %ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0
+        call i32 (i8*, ...)* @printf( i8* %lt_s, i8 %val1 )
+        ret i32 0
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/idxload-with-zero-offset.ll b/test/CodeGen/Hexagon/idxload-with-zero-offset.ll
new file mode 100644
index 0000000000..ca6df88a55
--- /dev/null
+++ b/test/CodeGen/Hexagon/idxload-with-zero-offset.ll
@@ -0,0 +1,70 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate load instruction with (base + register offset << 0)
+
+; load word
+
+define i32 @load_w(i32* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i32* %a, i32 %tmp
+  %val = load i32* %scevgep9, align 4
+  ret i32 %val
+}
+
+; load unsigned half word
+
+define i16 @load_uh(i16* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memuh(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i16* %a, i32 %tmp
+  %val = load i16* %scevgep9, align 2
+  ret i16 %val
+}
+
+; load signed half word
+
+define i32 @load_h(i16* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memh(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i16* %a, i32 %tmp
+  %val = load i16* %scevgep9, align 2
+  %conv = sext i16 %val to i32
+  ret i32 %conv
+}
+
+; load unsigned byte
+
+define i8 @load_ub(i8* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i8* %a, i32 %tmp
+  %val = load i8* %scevgep9, align 1
+  ret i8 %val
+}
+
+; load signed byte
+
+define i32 @foo_2(i8* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memb(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i8* %a, i32 %tmp
+  %val = load i8* %scevgep9, align 1
+  %conv = sext i8 %val to i32
+  ret i32 %conv
+}
+
+; load doubleword
+
+define i64 @load_d(i64* nocapture %a, i32 %n) nounwind {
+; CHECK: r{{[0-9]+}}:{{[0-9]+}}{{ *}}={{ *}}memd(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+entry:
+  %tmp = shl i32 %n, 4
+  %scevgep9 = getelementptr i64* %a, i32 %tmp
+  %val = load i64* %scevgep9, align 8
+  ret i64 %val
+}
diff --git a/test/CodeGen/Hexagon/indirect-br.ll b/test/CodeGen/Hexagon/indirect-br.ll
new file mode 100644
index 0000000000..919e501891
--- /dev/null
+++ b/test/CodeGen/Hexagon/indirect-br.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+;CHECK: jumpr  r{{[0-9]+}}
+
+define i32 @check_indirect_br(i8* %target) nounwind {
+entry:
+        indirectbr i8* %target, [label %test_label]
+
+test_label:
+        br label %ret
+
+ret:
+        ret i32 -1
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/postinc-store.ll b/test/CodeGen/Hexagon/postinc-store.ll
new file mode 100644
index 0000000000..99a3a58ad3
--- /dev/null
+++ b/test/CodeGen/Hexagon/postinc-store.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; Check that post-increment store instructions are being generated.
+; CHECK: memw(r{{[0-9]+}}{{ *}}++{{ *}}#4{{ *}}){{ *}}={{ *}}r{{[0-9]+}}
+
+define i32 @sum(i32* nocapture %a, i16* nocapture %b, i32 %n) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 10, %entry ]
+  %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
+  %arrayidx1.phi = phi i16* [ %b, %entry ], [ %arrayidx1.inc, %for.body ]
+  %0 = load i32* %arrayidx.phi, align 4
+  %1 = load i16* %arrayidx1.phi, align 2
+  %conv = sext i16 %1 to i32
+  %factor = mul i32 %0, 2
+  %add3 = add i32 %factor, %conv
+  store i32 %add3, i32* %arrayidx.phi, align 4
+
+  %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
+  %arrayidx1.inc = getelementptr i16* %arrayidx1.phi, i32 1
+  %lsr.iv.next = add i32 %lsr.iv, -1
+  %exitcond = icmp eq i32 %lsr.iv.next, 0
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/pred-absolute-store.ll b/test/CodeGen/Hexagon/pred-absolute-store.ll
new file mode 100644
index 0000000000..b1b09f414a
--- /dev/null
+++ b/test/CodeGen/Hexagon/pred-absolute-store.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we are able to predicate instructions with abosolute
+; addressing mode.
+
+; CHECK: if{{ *}}(p{{[0-3]+}}){{ *}}memw(##gvar){{ *}}={{ *}}r{{[0-9]+}}
+
+@gvar = external global i32
+define i32 @test2(i32 %a, i32 %b) nounwind {
+entry:
+  %cmp = icmp eq i32 %a, %b
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 %a, i32* @gvar, align 4
+  br label %if.end
+
+if.end:
+  ret i32 %b
+}
diff --git a/test/CodeGen/Hexagon/predicate-copy.ll b/test/CodeGen/Hexagon/predicate-copy.ll
new file mode 100644
index 0000000000..552b687941
--- /dev/null
+++ b/test/CodeGen/Hexagon/predicate-copy.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}} = p{{[0-9]+}}
+define i1 @foo() {
+entry:
+  ret i1 false
+}
+
diff --git a/test/CodeGen/Hexagon/struct_args.ll b/test/CodeGen/Hexagon/struct_args.ll
index e488f33c3d..f91300b506 100644
--- a/test/CodeGen/Hexagon/struct_args.ll
+++ b/test/CodeGen/Hexagon/struct_args.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; CHECK: r{{[0-9]}}:{{[0-9]}} = combine(r{{[0-9]}}, r{{[0-9]}})
+; CHECK: r{{[0-9]}}:{{[0-9]}} = combine({{r[0-9]|#0}}, r{{[0-9]}})
 ; CHECK: r{{[0-9]}}:{{[0-9]}} |= asl(r{{[0-9]}}:{{[0-9]}}, #32)
 
 %struct.small = type { i32, i32 }
diff --git a/test/CodeGen/Hexagon/sube.ll b/test/CodeGen/Hexagon/sube.ll
new file mode 100644
index 0000000000..84172e957d
--- /dev/null
+++ b/test/CodeGen/Hexagon/sube.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+:[0-9]+}} = #0
+; CHECK: r{{[0-9]+:[0-9]+}} = #1
+; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}}, r{{[0-9]+}})
+
+define void @check_sube_subc(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+        %tmp1 = zext i64 %AL to i128
+        %tmp23 = zext i64 %AH to i128
+        %tmp4 = shl i128 %tmp23, 64
+        %tmp5 = or i128 %tmp4, %tmp1
+        %tmp67 = zext i64 %BL to i128
+        %tmp89 = zext i64 %BH to i128
+        %tmp11 = shl i128 %tmp89, 64
+        %tmp12 = or i128 %tmp11, %tmp67
+        %tmp15 = sub i128 %tmp5, %tmp12
+        %tmp1617 = trunc i128 %tmp15 to i64
+        store i64 %tmp1617, i64* %RL
+        %tmp21 = lshr i128 %tmp15, 64
+        %tmp2122 = trunc i128 %tmp21 to i64
+        store i64 %tmp2122, i64* %RH
+        ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/Hexagon/validate-offset.ll b/test/CodeGen/Hexagon/validate-offset.ll
new file mode 100644
index 0000000000..9e7d0aa078
--- /dev/null
+++ b/test/CodeGen/Hexagon/validate-offset.ll
@@ -0,0 +1,36 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s -O0
+
+; This is a regression test which makes sure that the offset check
+; is available for STRiw_indexed instruction. This is required
+; by 'Hexagon Expand Predicate Spill Code' pass.
+
+define i32 @f(i32 %a, i32 %b) nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  %0 = load i32* %a.addr, align 4
+  %1 = load i32* %b.addr, align 4
+  %cmp = icmp sgt i32 %0, %1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %2 = load i32* %a.addr, align 4
+  %3 = load i32* %b.addr, align 4
+  %add = add nsw i32 %2, %3
+  store i32 %add, i32* %retval
+  br label %return
+
+if.else:
+  %4 = load i32* %a.addr, align 4
+  %5 = load i32* %b.addr, align 4
+  %sub = sub nsw i32 %4, %5
+  store i32 %sub, i32* %retval
+  br label %return
+
+return:
+  %6 = load i32* %retval
+  ret i32 %6
+}
diff --git a/test/CodeGen/Hexagon/zextloadi1.ll b/test/CodeGen/Hexagon/zextloadi1.ll
new file mode 100644
index 0000000000..cb6e6fdf84
--- /dev/null
+++ b/test/CodeGen/Hexagon/zextloadi1.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}} = ##i129_l+16
+; CHECK: r{{[0-9]+}} = ##i129_s+16
+; CHECK: memd(##i129_s) = r{{[0-9]+:[0-9]+}}
+; CHECK: r{{[0-9]+}} = ##i65_l+8
+; CHECK: r{{[0-9]+}} = ##i65_s+8
+; CHECK: memd(##i65_s) = r{{[0-9]+:[0-9]+}}
+
+@i65_l = external global i65
+@i65_s = external global i65
+@i129_l = external global i129
+@i129_s = external global i129
+
+define void @i129_ls() nounwind  {
+        %tmp = load i129* @i129_l
+        store i129 %tmp, i129* @i129_s
+        ret void
+}
+
+define void @i65_ls() nounwind  {
+        %tmp = load i65* @i65_l
+        store i65 %tmp, i65* @i65_s
+        ret void
+}
+\ No newline at end of file