Apply upstream r183551, r183601, r183624 and r183794

Rename countTrailingZeros to the older CountTrailingZeros_32, mark as localmod. These patches fix correctness issues with ARM FastISel, and should make it faster while generating better code. BUG= none TEST= self R=jvoung@chromium.org Review URL: https://codereview.chromium.org/16712002
author: JF Bastien <jfb@chromium.org> 2013-06-11 15:49:41 -0700
committer: JF Bastien <jfb@chromium.org> 2013-06-11 15:49:41 -0700
commit: 797f498e639f7247aa738156e599ca1c9b02e161 (patch)
tree: 22cc892ae7a7d063f23b1f1693126bd28b9027e4
parent: e299775086069bfd51512e21ee74af59fabd9fac (diff)
10 files changed, 337 insertions, 91 deletions
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index b21c61af37..5fc052f44b 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -20,6 +20,7 @@
 #include "ARMSubtarget.h"
 #include "ARMTargetMachine.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -1059,7 +1060,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
           useAM3 = true;
         }
       }
-      RC = &ARM::GPRRegClass;
+      RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
       break;
     case MVT::i16:
       if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
@@ -1074,7 +1075,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
         Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
         useAM3 = true;
       }
-      RC = &ARM::GPRRegClass;
+      RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
       break;
     case MVT::i32:
       if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
@@ -1088,7 +1089,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
       } else {
         Opc = ARM::LDRi12;
       }
-      RC = &ARM::GPRRegClass;
+      RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
       break;
     case MVT::f32:
       if (!Subtarget->hasVFP2()) return false;
@@ -1097,7 +1098,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
         needVMOV = true;
         VT = MVT::i32;
         Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
-        RC = &ARM::GPRRegClass;
+        RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
       } else {
         Opc = ARM::VLDRS;
         RC = TLI.getRegClassFor(VT);
@@ -2019,7 +2020,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
       case CCValAssign::ZExt: {
         MVT DestVT = VA.getLocVT();
         Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
-        assert (Arg != 0 && "Failed to emit a sext");
+        assert (Arg != 0 && "Failed to emit a zext");
         ArgVT = DestVT;
         break;
       }
@@ -2636,47 +2637,114 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
                                     bool isZExt) {
   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
     return 0;
+  if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1)
+    return 0;
 
-  unsigned Opc;
-  bool isBoolZext = false;
-  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
-  switch (SrcVT.SimpleTy) {
-  default: return 0;
-  case MVT::i16:
-    if (!Subtarget->hasV6Ops()) return 0;
-    RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
-    if (isZExt)
-      Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH;
-    else
-      Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
-    break;
-  case MVT::i8:
-    if (!Subtarget->hasV6Ops()) return 0;
-    RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
-    if (isZExt)
-      Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB;
-    else
-      Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
-    break;
-  case MVT::i1:
-    if (isZExt) {
-      RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
-      Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
-      isBoolZext = true;
-      break;
+  // Table of which combinations can be emitted as a single instruction,
+  // and which will require two.
+  static const uint8_t isSingleInstrTbl[3][2][2][2] = {
+    //            ARM                     Thumb
+    //           !hasV6Ops  hasV6Ops     !hasV6Ops  hasV6Ops
+    //    ext:     s  z      s  z          s  z      s  z
+    /*  1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
+    /*  8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
+    /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
+  };
+
+  // Target registers for:
+  //  - For ARM can never be PC.
+  //  - For 16-bit Thumb are restricted to lower 8 registers.
+  //  - For 32-bit Thumb are restricted to non-SP and non-PC.
+  static const TargetRegisterClass *RCTbl[2][2] = {
+    // Instructions: Two                     Single
+    /* ARM      */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass },
+    /* Thumb    */ { &ARM::tGPRRegClass,    &ARM::rGPRRegClass    }
+  };
+
+  // Table governing the instruction(s) to be emitted.
+  static const struct {
+    // First entry for each of the following is sext, second zext.
+    uint16_t Opc[2];
+    uint8_t Imm[2];   // All instructions have either a shift or a mask.
+    uint8_t hasS[2];  // Some instructions have an S bit, always set it to 0.
+  } OpcTbl[2][2][3] = {
+    { // Two instructions (first is left shift, second is in this table).
+      { // ARM
+        /*  1 */ { { ARM::ASRi,   ARM::LSRi    }, {  31,  31 }, { 1, 1 } },
+        /*  8 */ { { ARM::ASRi,   ARM::LSRi    }, {  24,  24 }, { 1, 1 } },
+        /* 16 */ { { ARM::ASRi,   ARM::LSRi    }, {  16,  16 }, { 1, 1 } }
+      },
+      { // Thumb
+        /*  1 */ { { ARM::tASRri, ARM::tLSRri  }, {  31,  31 }, { 0, 0 } },
+        /*  8 */ { { ARM::tASRri, ARM::tLSRri  }, {  24,  24 }, { 0, 0 } },
+        /* 16 */ { { ARM::tASRri, ARM::tLSRri  }, {  16,  16 }, { 0, 0 } }
+      }
+    },
+    { // Single instruction.
+      { // ARM
+        /*  1 */ { { ARM::KILL,   ARM::ANDri   }, {   0,   1 }, { 0, 1 } },
+        /*  8 */ { { ARM::SXTB,   ARM::ANDri   }, {   0, 255 }, { 0, 1 } },
+        /* 16 */ { { ARM::SXTH,   ARM::UXTH    }, {   0,   0 }, { 0, 0 } }
+      },
+      { // Thumb
+        /*  1 */ { { ARM::KILL,   ARM::t2ANDri }, {   0,   1 }, { 0, 1 } },
+        /*  8 */ { { ARM::t2SXTB, ARM::t2ANDri }, {   0, 255 }, { 0, 1 } },
+        /* 16 */ { { ARM::t2SXTH, ARM::t2UXTH  }, {   0,   0 }, { 0, 0 } }
+      }
     }
-    return 0;
+  };
+
+  unsigned SrcBits = SrcVT.getSizeInBits();
+  unsigned DestBits = DestVT.getSizeInBits();
+  (void) DestBits;
+  assert((SrcBits < DestBits) && "can only extend to larger types");
+  assert((DestBits == 32 || DestBits == 16 || DestBits == 8) &&
+         "other sizes unimplemented");
+  assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) &&
+         "other sizes unimplemented");
+
+  bool hasV6Ops = Subtarget->hasV6Ops();
+  // @LOCALMOD-START A further LLVM revision renames to countTrailingZeros.
+  unsigned Bitness = CountTrailingZeros_32(SrcBits) >> 1;  // {1,8,16}=>{0,1,2}
+  // @LOCALMOD-END
+  assert((Bitness < 3) && "sanity-check table bounds");
+
+  bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt];
+  const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr];
+  unsigned Opc = OpcTbl[isSingleInstr][isThumb2][Bitness].Opc[isZExt];
+  assert(ARM::KILL != Opc && "Invalid table entry");
+  unsigned Imm = OpcTbl[isSingleInstr][isThumb2][Bitness].Imm[isZExt];
+  unsigned hasS = OpcTbl[isSingleInstr][isThumb2][Bitness].hasS[isZExt];
+
+  // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
+  bool setsCPSR = &ARM::tGPRRegClass == RC;
+  unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::LSLi;
+  unsigned ResultReg;
+
+  // Either one or two instructions are emitted.
+  // They're always of the form:
+  //   dst = in OP imm
+  // CPSR is set only by 16-bit Thumb instructions.
+  // Predicate, if any, is AL.
+  // S bit, if available, is always 0.
+  // When two are emitted the first's result will feed as the second's input,
+  // that value is then dead.
+  unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2;
+  for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) {
+    ResultReg = createResultReg(RC);
+    unsigned Opcode = ((0 == Instr) && !isSingleInstr) ? LSLOpc : Opc;
+    bool isKill = 1 == Instr;
+    MachineInstrBuilder MIB = BuildMI(
+        *FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opcode), ResultReg);
+    if (setsCPSR)
+      MIB.addReg(ARM::CPSR, RegState::Define);
+    AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(Imm));
+    if (hasS)
+      AddDefaultCC(MIB);
+    // Second instruction consumes the first's result.
+    SrcReg = ResultReg;
   }
 
-  unsigned ResultReg = createResultReg(RC);
-  MachineInstrBuilder MIB;
-  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
-        .addReg(SrcReg);
-  if (isBoolZext)
-    MIB.addImm(1);
-  else
-    MIB.addImm(0);
-  AddOptionalDefs(MIB);
   return ResultReg;
 }
 
@@ -2831,12 +2899,33 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
   return false;
 }
 
-/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+namespace {
+// This table describes sign- and zero-extend instructions which can be
+// folded into a preceding load. All of these extends have an immediate
+// (sometimes a mask and sometimes a shift) that's applied after
+// extension.
+const struct FoldableLoadExtendsStruct {
+  uint16_t Opc[2];  // ARM, Thumb.
+  uint8_t ExpectedImm;
+  uint8_t isZExt     : 1;
+  uint8_t ExpectedVT : 7;
+} FoldableLoadExtends[] = {
+  { { ARM::SXTH,  ARM::t2SXTH  },   0, 0, MVT::i16 },
+  { { ARM::UXTH,  ARM::t2UXTH  },   0, 1, MVT::i16 },
+  { { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8  },
+  { { ARM::SXTB,  ARM::t2SXTB  },   0, 0, MVT::i8  },
+  { { ARM::UXTB,  ARM::t2UXTB  },   0, 1, MVT::i8  }
+};
+}
+
+/// \brief The specified machine instr operand is a vreg, and that
 /// vreg is being provided by the specified load instruction.  If possible,
 /// try to fold the load as an operand to the instruction, returning true if
 /// successful.
+// @LOCALMOD-START Name of this function changed.
 bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
                                 const LoadInst *LI) {
+// @LOCALMOD-END
   // Verify we have a legal type before going any further.
   MVT VT;
   if (!isLoadTypeLegal(LI->getType(), VT))
@@ -2846,26 +2935,24 @@ bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
   // ldrb r1, [r0]       ldrb r1, [r0]
   // uxtb r2, r1     =>
   // mov  r3, r2         mov  r3, r1
-  bool isZExt = true;
-  switch(MI->getOpcode()) {
-    default: return false;
-    case ARM::SXTH:
-    case ARM::t2SXTH:
-      isZExt = false;
-    case ARM::UXTH:
-    case ARM::t2UXTH:
-      if (VT != MVT::i16)
-        return false;
-    break;
-    case ARM::SXTB:
-    case ARM::t2SXTB:
-      isZExt = false;
-    case ARM::UXTB:
-    case ARM::t2UXTB:
-      if (VT != MVT::i8)
-        return false;
-    break;
+  //
+  if (MI->getNumOperands() < 3 || !MI->getOperand(2).isImm())
+    return false;
+  const uint64_t Imm = MI->getOperand(2).getImm();
+
+  bool Found = false;
+  bool isZExt;
+  for (unsigned i = 0, e = array_lengthof(FoldableLoadExtends);
+       i != e; ++i) {
+    if (FoldableLoadExtends[i].Opc[isThumb2] == MI->getOpcode() &&
+        (uint64_t)FoldableLoadExtends[i].ExpectedImm == Imm &&
+        MVT((MVT::SimpleValueType)FoldableLoadExtends[i].ExpectedVT) == VT) {
+      Found = true;
+      isZExt = FoldableLoadExtends[i].isZExt;
+    }
   }
+  if (!Found) return false;
+
   // See if we can handle this address.
   Address Addr;
   if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
diff --git a/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll b/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll
new file mode 100644
index 0000000000..0130f7ab68
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -O0 -mtriple=armv4t--linux-eabi-android
+; RUN: llc < %s -O0 -mtriple=armv4t-unknown-linux
+; RUN: llc < %s -O0 -mtriple=armv5-unknown-linux
+
+; See http://llvm.org/bugs/show_bug.cgi?id=16178
+; ARMFastISel used to fail emitting sext/zext in pre-ARMv6.
+
+; Function Attrs: nounwind
+define arm_aapcscc void @f2(i8 signext %a) #0 {
+entry:
+  %a.addr = alloca i8, align 1
+  store i8 %a, i8* %a.addr, align 1
+  %0 = load i8* %a.addr, align 1
+  %conv = sext i8 %0 to i32
+  %shr = ashr i32 %conv, 56
+  %conv1 = trunc i32 %shr to i8
+  call arm_aapcscc void @f1(i8 signext %conv1)
+  ret void
+}
+
+declare arm_aapcscc void @f1(i8 signext) #1
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index 55911e5c1c..3833043ad7 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -51,9 +51,9 @@ define void @foo(i8 %a, i16 %b) nounwind {
 ; THUMB: sxtb	r2, r1
 ; THUMB: mov r0, r2
   %2 = call i32 @t1(i8 signext %a)
-; ARM: uxtb	r2, r1
+; ARM: and	r2, r1, #255
 ; ARM: mov r0, r2
-; THUMB: uxtb	r2, r1
+; THUMB: and	r2, r1, #255
 ; THUMB: mov r0, r2
   %3 = call i32 @t2(i8 zeroext %a)
 ; ARM: sxth	r2, r1
@@ -101,13 +101,13 @@ entry:
 ; ARM: movw [[R3:l?r[0-9]*]], #28
 ; ARM: movw [[R4:l?r[0-9]*]], #40
 ; ARM: movw [[R5:l?r[0-9]*]], #186
-; ARM: uxtb [[R0]], [[R0]]
-; ARM: uxtb [[R1]], [[R1]]
-; ARM: uxtb [[R2]], [[R2]]
-; ARM: uxtb [[R3]], [[R3]]
-; ARM: uxtb [[R4]], [[R4]]
+; ARM: and [[R0]], [[R0]], #255
+; ARM: and [[R1]], [[R1]], #255
+; ARM: and [[R2]], [[R2]], #255
+; ARM: and [[R3]], [[R3]], #255
+; ARM: and [[R4]], [[R4]], #255
 ; ARM: str [[R4]], [sp]
-; ARM: uxtb [[R4]], [[R5]]
+; ARM: and [[R4]], [[R5]], #255
 ; ARM: str [[R4]], [sp, #4]
 ; ARM: bl {{_?}}bar
 ; ARM-LONG: @t10
@@ -128,13 +128,13 @@ entry:
 ; THUMB: movt [[R4]], #0
 ; THUMB: movw [[R5:l?r[0-9]*]], #186
 ; THUMB: movt [[R5]], #0
-; THUMB: uxtb [[R0]], [[R0]]
-; THUMB: uxtb [[R1]], [[R1]]
-; THUMB: uxtb [[R2]], [[R2]]
-; THUMB: uxtb [[R3]], [[R3]]
-; THUMB: uxtb.w [[R4]], [[R4]]
+; THUMB: and [[R0]], [[R0]], #255
+; THUMB: and [[R1]], [[R1]], #255
+; THUMB: and [[R2]], [[R2]], #255
+; THUMB: and [[R3]], [[R3]], #255
+; THUMB: and [[R4]], [[R4]], #255
 ; THUMB: str.w [[R4]], [sp]
-; THUMB: uxtb.w [[R4]], [[R5]]
+; THUMB: and [[R4]], [[R5]], #255
 ; THUMB: str.w [[R4]], [sp, #4]
 ; THUMB: bl {{_?}}bar
 ; THUMB-LONG: @t10
diff --git a/test/CodeGen/ARM/fast-isel-conversion.ll b/test/CodeGen/ARM/fast-isel-conversion.ll
index 91034fb24f..e40891a2dd 100644
--- a/test/CodeGen/ARM/fast-isel-conversion.ll
+++ b/test/CodeGen/ARM/fast-isel-conversion.ll
@@ -131,11 +131,11 @@ entry:
 define void @uitofp_single_i8(i8 %a) nounwind ssp {
 entry:
 ; ARM: uitofp_single_i8
-; ARM: uxtb r0, r0
+; ARM: and r0, r0, #255
 ; ARM: vmov s0, r0
 ; ARM: vcvt.f32.u32 s0, s0
 ; THUMB: uitofp_single_i8
-; THUMB: uxtb r0, r0
+; THUMB: and r0, r0, #255
 ; THUMB: vmov s0, r0
 ; THUMB: vcvt.f32.u32 s0, s0
   %b.addr = alloca float, align 4
@@ -177,11 +177,11 @@ entry:
 define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
 entry:
 ; ARM: uitofp_double_i8
-; ARM: uxtb r0, r0
+; ARM: and r0, r0, #255
 ; ARM: vmov s0, r0
 ; ARM: vcvt.f64.u32 d16, s0
 ; THUMB: uitofp_double_i8
-; THUMB: uxtb r0, r0
+; THUMB: and r0, r0, #255
 ; THUMB: vmov s0, r0
 ; THUMB: vcvt.f64.u32 d16, s0
   %b.addr = alloca double, align 8
diff --git a/test/CodeGen/ARM/fast-isel-ext.ll b/test/CodeGen/ARM/fast-isel-ext.ll
new file mode 100644
index 0000000000..cb6e9ba1a1
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ext.ll
@@ -0,0 +1,134 @@
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=v7
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv4t-apple-ios | FileCheck %s --check-prefix=prev6
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv5-apple-ios | FileCheck %s --check-prefix=prev6
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=v7
+
+; Can't test pre-ARMv6 Thumb because ARM FastISel currently only supports
+; Thumb2. The ARMFastISel::ARMEmitIntExt code should work for Thumb by always
+; using two shifts.
+
+; Note that lsl, asr and lsr in Thumb are all encoded as 16-bit instructions
+; and therefore must set flags. {{s?}} below denotes this, instead of
+; duplicating tests.
+
+; zext
+
+define i8 @zext_1_8(i1 %a) nounwind ssp {
+; v7: zext_1_8:
+; v7: and r0, r0, #1
+; prev6: zext_1_8:
+; prev6: and r0, r0, #1
+  %r = zext i1 %a to i8
+  ret i8 %r
+}
+
+define i16 @zext_1_16(i1 %a) nounwind ssp {
+; v7: zext_1_16:
+; v7: and r0, r0, #1
+; prev6: zext_1_16:
+; prev6: and r0, r0, #1
+  %r = zext i1 %a to i16
+  ret i16 %r
+}
+
+define i32 @zext_1_32(i1 %a) nounwind ssp {
+; v7: zext_1_32:
+; v7: and r0, r0, #1
+; prev6: zext_1_32:
+; prev6: and r0, r0, #1
+  %r = zext i1 %a to i32
+  ret i32 %r
+}
+
+define i16 @zext_8_16(i8 %a) nounwind ssp {
+; v7: zext_8_16:
+; v7: and r0, r0, #255
+; prev6: zext_8_16:
+; prev6: and r0, r0, #255
+  %r = zext i8 %a to i16
+  ret i16 %r
+}
+
+define i32 @zext_8_32(i8 %a) nounwind ssp {
+; v7: zext_8_32:
+; v7: and r0, r0, #255
+; prev6: zext_8_32:
+; prev6: and r0, r0, #255
+  %r = zext i8 %a to i32
+  ret i32 %r
+}
+
+define i32 @zext_16_32(i16 %a) nounwind ssp {
+; v7: zext_16_32:
+; v7: uxth r0, r0
+; prev6: zext_16_32:
+; prev6: lsl{{s?}} r0, r0, #16
+; prev6: lsr{{s?}} r0, r0, #16
+  %r = zext i16 %a to i32
+  ret i32 %r
+}
+
+; sext
+
+define i8 @sext_1_8(i1 %a) nounwind ssp {
+; v7: sext_1_8:
+; v7: lsl{{s?}} r0, r0, #31
+; v7: asr{{s?}} r0, r0, #31
+; prev6: sext_1_8:
+; prev6: lsl{{s?}} r0, r0, #31
+; prev6: asr{{s?}} r0, r0, #31
+  %r = sext i1 %a to i8
+  ret i8 %r
+}
+
+define i16 @sext_1_16(i1 %a) nounwind ssp {
+; v7: sext_1_16:
+; v7: lsl{{s?}} r0, r0, #31
+; v7: asr{{s?}} r0, r0, #31
+; prev6: sext_1_16:
+; prev6: lsl{{s?}} r0, r0, #31
+; prev6: asr{{s?}} r0, r0, #31
+  %r = sext i1 %a to i16
+  ret i16 %r
+}
+
+define i32 @sext_1_32(i1 %a) nounwind ssp {
+; v7: sext_1_32:
+; v7: lsl{{s?}} r0, r0, #31
+; v7: asr{{s?}} r0, r0, #31
+; prev6: sext_1_32:
+; prev6: lsl{{s?}} r0, r0, #31
+; prev6: asr{{s?}} r0, r0, #31
+  %r = sext i1 %a to i32
+  ret i32 %r
+}
+
+define i16 @sext_8_16(i8 %a) nounwind ssp {
+; v7: sext_8_16:
+; v7: sxtb r0, r0
+; prev6: sext_8_16:
+; prev6: lsl{{s?}} r0, r0, #24
+; prev6: asr{{s?}} r0, r0, #24
+  %r = sext i8 %a to i16
+  ret i16 %r
+}
+
+define i32 @sext_8_32(i8 %a) nounwind ssp {
+; v7: sext_8_32:
+; v7: sxtb r0, r0
+; prev6: sext_8_32:
+; prev6: lsl{{s?}} r0, r0, #24
+; prev6: asr{{s?}} r0, r0, #24
+  %r = sext i8 %a to i32
+  ret i32 %r
+}
+
+define i32 @sext_16_32(i16 %a) nounwind ssp {
+; v7: sext_16_32:
+; v7: sxth r0, r0
+; prev6: sext_16_32:
+; prev6: lsl{{s?}} r0, r0, #16
+; prev6: asr{{s?}} r0, r0, #16
+  %r = sext i16 %a to i32
+  ret i32 %r
+}
diff --git a/test/CodeGen/ARM/fast-isel-fold.ll b/test/CodeGen/ARM/fast-isel-fold.ll
index 38e1f884bf..e8ed8cbf34 100644
--- a/test/CodeGen/ARM/fast-isel-fold.ll
+++ b/test/CodeGen/ARM/fast-isel-fold.ll
@@ -9,9 +9,11 @@ define void @t1() nounwind uwtable ssp {
 ; ARM: t1
 ; ARM: ldrb
 ; ARM-NOT: uxtb
+; ARM-NOT: and{{.*}}, #255
 ; THUMB: t1
 ; THUMB: ldrb
 ; THUMB-NOT: uxtb
+; THUMB-NOT: and{{.*}}, #255
   %1 = load i8* @a, align 1
   call void @foo1(i8 zeroext %1)
   ret void
@@ -36,9 +38,11 @@ define i32 @t3() nounwind uwtable ssp {
 ; ARM: t3
 ; ARM: ldrb
 ; ARM-NOT: uxtb
+; ARM-NOT: and{{.*}}, #255
 ; THUMB: t3
 ; THUMB: ldrb
 ; THUMB-NOT: uxtb
+; THUMB-NOT: and{{.*}}, #255
   %1 = load i8* @a, align 1
   %2 = zext i8 %1 to i32
   ret i32 %2
diff --git a/test/CodeGen/ARM/fast-isel-icmp.ll b/test/CodeGen/ARM/fast-isel-icmp.ll
index 04a92825af..3dc1109165 100644
--- a/test/CodeGen/ARM/fast-isel-icmp.ll
+++ b/test/CodeGen/ARM/fast-isel-icmp.ll
@@ -50,12 +50,12 @@ entry:
 define i32 @icmp_i8_unsigned(i8 %a, i8 %b) nounwind {
 entry:
 ; ARM: icmp_i8_unsigned
-; ARM: uxtb r0, r0
-; ARM: uxtb r1, r1
+; ARM: and r0, r0, #255
+; ARM: and r1, r1, #255
 ; ARM: cmp r0, r1
 ; THUMB: icmp_i8_unsigned
-; THUMB: uxtb r0, r0
-; THUMB: uxtb r1, r1
+; THUMB: and r0, r0, #255
+; THUMB: and r1, r1, #255
 ; THUMB: cmp r0, r1
   %cmp = icmp ugt i8 %a, %b
   %conv2 = zext i1 %cmp to i32
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index f92a702960..bcdcdeb1ea 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -19,7 +19,7 @@ define void @t1() nounwind ssp {
 ; ARM: add r0, r0, #5
 ; ARM: movw r1, #64
 ; ARM: movw r2, #10
-; ARM: uxtb r1, r1
+; ARM: and r1, r1, #255
 ; ARM: bl {{_?}}memset
 ; ARM-LONG: t1
 ; ARM-LONG: {{(movw r3, :lower16:L_memset\$non_lazy_ptr)|(ldr r3, .LCPI)}}
@@ -34,7 +34,7 @@ define void @t1() nounwind ssp {
 ; THUMB: movt r1, #0
 ; THUMB: movs r2, #10
 ; THUMB: movt r2, #0
-; THUMB: uxtb r1, r1
+; THUMB: and r1, r1, #255
 ; THUMB: bl {{_?}}memset
 ; THUMB-LONG: t1
 ; THUMB-LONG: {{(movw r0, :lower16:_?message1)|(ldr.n r0, .LCPI)}}
diff --git a/test/CodeGen/ARM/fast-isel-ret.ll b/test/CodeGen/ARM/fast-isel-ret.ll
index a7d271a94c..ba5412c4f1 100644
--- a/test/CodeGen/ARM/fast-isel-ret.ll
+++ b/test/CodeGen/ARM/fast-isel-ret.ll
@@ -27,7 +27,7 @@ entry:
 define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp {
 entry:
 ; CHECK: ret3
-; CHECK: uxtb r0, r0
+; CHECK: and r0, r0, #255
 ; CHECK: bx lr
   ret i8 %a
 }
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index c4274c5eb5..f877e78c6e 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -80,12 +80,12 @@ bb1:
 
 ; THUMB: and
 ; THUMB: strb
-; THUMB: uxtb
+; THUMB: and{{.*}}, #255
 ; THUMB: strh
 ; THUMB: uxth
 ; ARM: and
 ; ARM: strb
-; ARM: uxtb
+; ARM: and{{.*}}, #255
 ; ARM: strh
 ; ARM: uxth
 
@@ -121,13 +121,13 @@ bb3:
 
 ; THUMB: ldrb
 ; THUMB: ldrh
-; THUMB: uxtb
+; THUMB: and{{.*}}, #255
 ; THUMB: sxth
 ; THUMB: add
 ; THUMB: sub
 ; ARM: ldrb
 ; ARM: ldrh
-; ARM: uxtb
+; ARM: and{{.*}}, #255
 ; ARM: sxth
 ; ARM: add
 ; ARM: sub
author	JF Bastien <jfb@chromium.org>	2013-06-11 15:49:41 -0700
committer	JF Bastien <jfb@chromium.org>	2013-06-11 15:49:41 -0700
commit	797f498e639f7247aa738156e599ca1c9b02e161 (patch)
tree	22cc892ae7a7d063f23b1f1693126bd28b9027e4
parent	e299775086069bfd51512e21ee74af59fabd9fac (diff)