34 files changed, 2543 insertions, 1963 deletions
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 47baef6696..7da2fed4cd 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -22,6 +22,7 @@ set(NVPTXCodeGen_sources
   NVPTXAllocaHoisting.cpp
   NVPTXAsmPrinter.cpp
   NVPTXUtilities.cpp
+  NVVMReflect.cpp
   )
 
 add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index 454583850b..b3e8b5d262 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -52,25 +52,24 @@ enum PropertyAnnotation {
 };
 
 const unsigned AnnotationNameLen = 8; // length of each annotation name
-const char
-PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
-  "maxntidx",               // PROPERTY_MAXNTID_X
-  "maxntidy",               // PROPERTY_MAXNTID_Y
-  "maxntidz",               // PROPERTY_MAXNTID_Z
-  "reqntidx",               // PROPERTY_REQNTID_X
-  "reqntidy",               // PROPERTY_REQNTID_Y
-  "reqntidz",               // PROPERTY_REQNTID_Z
-  "minctasm",               // PROPERTY_MINNCTAPERSM
-  "texture",                // PROPERTY_ISTEXTURE
-  "surface",                // PROPERTY_ISSURFACE
-  "sampler",                // PROPERTY_ISSAMPLER
-  "rdoimage",               // PROPERTY_ISREADONLY_IMAGE_PARAM
-  "wroimage",               // PROPERTY_ISWRITEONLY_IMAGE_PARAM
-  "kernel",                 // PROPERTY_ISKERNEL_FUNCTION
-  "align",                  // PROPERTY_ALIGN
+const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
+  "maxntidx",                         // PROPERTY_MAXNTID_X
+  "maxntidy",                         // PROPERTY_MAXNTID_Y
+  "maxntidz",                         // PROPERTY_MAXNTID_Z
+  "reqntidx",                         // PROPERTY_REQNTID_X
+  "reqntidy",                         // PROPERTY_REQNTID_Y
+  "reqntidz",                         // PROPERTY_REQNTID_Z
+  "minctasm",                         // PROPERTY_MINNCTAPERSM
+  "texture",                          // PROPERTY_ISTEXTURE
+  "surface",                          // PROPERTY_ISSURFACE
+  "sampler",                          // PROPERTY_ISSAMPLER
+  "rdoimage",                         // PROPERTY_ISREADONLY_IMAGE_PARAM
+  "wroimage",                         // PROPERTY_ISWRITEONLY_IMAGE_PARAM
+  "kernel",                           // PROPERTY_ISKERNEL_FUNCTION
+  "align",                            // PROPERTY_ALIGN
 
-  // last property
-  "proplast",               // PROPERTY_LAST
+              // last property
+  "proplast", // PROPERTY_LAST
 };
 
 // name of named metadata used for global annotations
@@ -80,9 +79,8 @@ PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
 // compiling those .cpp files, hence __attribute__((unused)).
 __attribute__((unused))
 #endif
-static const char* NamedMDForAnnotations = "nvvm.annotations";
+    static const char *NamedMDForAnnotations = "nvvm.annotations";
 
 }
 
-
 #endif
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 619181994a..459cd96cb0 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -23,10 +23,9 @@ bool CompileForDebugging;
 // compile for debugging
 static cl::opt<bool, true>
 Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden,
-      cl::location(CompileForDebugging),
-      cl::init(false));
+      cl::location(CompileForDebugging), cl::init(false));
 
-void NVPTXMCAsmInfo::anchor() { }
+void NVPTXMCAsmInfo::anchor() {}
 
 NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
   Triple TheTriple(TT);
@@ -55,7 +54,7 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
   Data32bitsDirective = " .b32 ";
   Data64bitsDirective = " .b64 ";
   PrivateGlobalPrefix = "";
-  ZeroDirective =  " .b8";
+  ZeroDirective = " .b8";
   AsciiDirective = " .b8";
   AscizDirective = " .b8";
 
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 44aa01ca6e..ccd29705df 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -28,7 +28,6 @@
 #define GET_REGINFO_MC_DESC
 #include "NVPTXGenRegisterInfo.inc"
 
-
 using namespace llvm;
 
 static MCInstrInfo *createNVPTXMCInstrInfo() {
@@ -44,22 +43,20 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
   return X;
 }
 
-static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
-                                                   StringRef FS) {
+static MCSubtargetInfo *
+createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
   MCSubtargetInfo *X = new MCSubtargetInfo();
   InitNVPTXMCSubtargetInfo(X, TT, CPU, FS);
   return X;
 }
 
-static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                               CodeModel::Model CM,
-                                               CodeGenOpt::Level OL) {
+static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
+    StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
   X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
-
 // Force static initialization.
 extern "C" void LLVMInitializeNVPTXTargetMC() {
   // Register the MC asm info.
diff --git a/lib/Target/NVPTX/ManagedStringPool.h b/lib/Target/NVPTX/ManagedStringPool.h
index b5684883fc..d6c79b5110 100644
--- a/lib/Target/NVPTX/ManagedStringPool.h
+++ b/lib/Target/NVPTX/ManagedStringPool.h
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #ifndef LLVM_SUPPORT_MANAGED_STRING_H
 #define LLVM_SUPPORT_MANAGED_STRING_H
 
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index b46ea881c4..6a53a443bf 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -41,18 +41,24 @@ enum CondCodes {
 
 inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
   switch (CC) {
-  case NVPTXCC::NE:  return "ne";
-  case NVPTXCC::EQ:   return "eq";
-  case NVPTXCC::LT:   return "lt";
-  case NVPTXCC::LE:  return "le";
-  case NVPTXCC::GT:  return "gt";
-  case NVPTXCC::GE:   return "ge";
+  case NVPTXCC::NE:
+    return "ne";
+  case NVPTXCC::EQ:
+    return "eq";
+  case NVPTXCC::LT:
+    return "lt";
+  case NVPTXCC::LE:
+    return "le";
+  case NVPTXCC::GT:
+    return "gt";
+  case NVPTXCC::GE:
+    return "ge";
   }
   llvm_unreachable("Unknown condition code");
 }
 
-FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
-                                 llvm::CodeGenOpt::Level OptLevel);
+FunctionPass *
+createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
 FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
 FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
 FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
@@ -62,8 +68,7 @@ bool isImageOrSamplerVal(const Value *, const Module *);
 extern Target TheNVPTXTarget32;
 extern Target TheNVPTXTarget64;
 
-namespace NVPTX
-{
+namespace NVPTX {
 enum DrvInterface {
   NVCL,
   CUDA,
@@ -102,7 +107,7 @@ enum LoadStore {
 };
 
 namespace PTXLdStInstCode {
-enum AddressSpace{
+enum AddressSpace {
   GENERIC = 0,
   GLOBAL = 1,
   CONSTANT = 2,
diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td
index 7aee3595c6..d78b4e81a3 100644
--- a/lib/Target/NVPTX/NVPTX.td
+++ b/lib/Target/NVPTX/NVPTX.td
@@ -26,14 +26,6 @@ include "NVPTXInstrInfo.td"
 //===----------------------------------------------------------------------===//
 
 // SM Versions
-def SM10 : SubtargetFeature<"sm_10", "SmVersion", "10",
-                            "Target SM 1.0">;
-def SM11 : SubtargetFeature<"sm_11", "SmVersion", "11",
-                            "Target SM 1.1">;
-def SM12 : SubtargetFeature<"sm_12", "SmVersion", "12",
-                            "Target SM 1.2">;
-def SM13 : SubtargetFeature<"sm_13", "SmVersion", "13",
-                            "Target SM 1.3">;
 def SM20 : SubtargetFeature<"sm_20", "SmVersion", "20",
                             "Target SM 2.0">;
 def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21",
@@ -56,10 +48,6 @@ def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31",
 class Proc<string Name, list<SubtargetFeature> Features>
  : Processor<Name, NoItineraries, Features>;
 
-def : Proc<"sm_10", [SM10]>;
-def : Proc<"sm_11", [SM11]>;
-def : Proc<"sm_12", [SM12]>;
-def : Proc<"sm_13", [SM13]>;
 def : Proc<"sm_20", [SM20]>;
 def : Proc<"sm_21", [SM21]>;
 def : Proc<"sm_30", [SM30]>;
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index 60f52a46da..0f792ec682 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -19,9 +19,9 @@
 namespace llvm {
 
 bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
-  bool               functionModified    = false;
-  Function::iterator I                   = function.begin();
-  TerminatorInst    *firstTerminatorInst = (I++)->getTerminator();
+  bool functionModified = false;
+  Function::iterator I = function.begin();
+  TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
 
   for (Function::iterator E = function.end(); I != E; ++I) {
     for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
@@ -37,12 +37,10 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
 }
 
 char NVPTXAllocaHoisting::ID = 1;
-RegisterPass<NVPTXAllocaHoisting> X("alloca-hoisting",
-                                    "Hoisting alloca instructions in non-entry "
-                                    "blocks to the entry block");
+RegisterPass<NVPTXAllocaHoisting>
+X("alloca-hoisting", "Hoisting alloca instructions in non-entry "
+                     "blocks to the entry block");
 
-FunctionPass *createAllocaHoisting() {
-  return new NVPTXAllocaHoisting();
-}
+FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); }
 
 } // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 0115e1f5d3..ce5d78afa3 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -47,7 +47,6 @@
 #include <sstream>
 using namespace llvm;
 
-
 #include "NVPTXGenAsmWriter.inc"
 
 bool RegAllocNilUsed = true;
@@ -59,21 +58,17 @@ EmitLineNumbers("nvptx-emit-line-numbers",
                 cl::desc("NVPTX Specific: Emit Line numbers even without -G"),
                 cl::init(true));
 
-namespace llvm  {
-bool InterleaveSrcInPtx = false;
-}
-
-static cl::opt<bool, true>InterleaveSrc("nvptx-emit-src",
-                                        cl::ZeroOrMore,
-                       cl::desc("NVPTX Specific: Emit source line in ptx file"),
-                                        cl::location(llvm::InterleaveSrcInPtx));
+namespace llvm { bool InterleaveSrcInPtx = false; }
 
+static cl::opt<bool, true>
+InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore,
+              cl::desc("NVPTX Specific: Emit source line in ptx file"),
+              cl::location(llvm::InterleaveSrcInPtx));
 
 namespace {
 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
 /// depends.
-void DiscoverDependentGlobals(Value *V,
-                              DenseSet<GlobalVariable*> &Globals) {
+void DiscoverDependentGlobals(Value *V, DenseSet<GlobalVariable *> &Globals) {
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
     Globals.insert(GV);
   else {
@@ -88,12 +83,12 @@ void DiscoverDependentGlobals(Value *V,
 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
 /// instances to be emitted, but only after any dependents have been added
 /// first.
-void VisitGlobalVariableForEmission(GlobalVariable *GV,
-                                    SmallVectorImpl<GlobalVariable*> &Order,
-                                    DenseSet<GlobalVariable*> &Visited,
-                                    DenseSet<GlobalVariable*> &Visiting) {
+void VisitGlobalVariableForEmission(
+    GlobalVariable *GV, SmallVectorImpl<GlobalVariable *> &Order,
+    DenseSet<GlobalVariable *> &Visited, DenseSet<GlobalVariable *> &Visiting) {
   // Have we already visited this one?
-  if (Visited.count(GV)) return;
+  if (Visited.count(GV))
+    return;
 
   // Do we have a circular dependency?
   if (Visiting.count(GV))
@@ -103,12 +98,13 @@ void VisitGlobalVariableForEmission(GlobalVariable *GV,
   Visiting.insert(GV);
 
   // Make sure we visit all dependents first
-  DenseSet<GlobalVariable*> Others;
+  DenseSet<GlobalVariable *> Others;
   for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
     DiscoverDependentGlobals(GV->getOperand(i), Others);
-  
-  for (DenseSet<GlobalVariable*>::iterator I = Others.begin(),
-       E = Others.end(); I != E; ++I)
+
+  for (DenseSet<GlobalVariable *>::iterator I = Others.begin(),
+                                            E = Others.end();
+       I != E; ++I)
     VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
 
   // Now we can visit ourself
@@ -142,25 +138,23 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
   if (CE == 0)
     llvm_unreachable("Unknown constant value to lower!");
 
-
   switch (CE->getOpcode()) {
   default:
     // If the code isn't optimized, there may be outstanding folding
     // opportunities. Attempt to fold the expression using DataLayout as a
     // last resort before giving up.
-    if (Constant *C =
-        ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
+    if (Constant *C = ConstantFoldConstantExpression(CE, AP.TM.getDataLayout()))
       if (C != CE)
         return LowerConstant(C, AP);
 
     // Otherwise report the problem to the user.
     {
-        std::string S;
-        raw_string_ostream OS(S);
-        OS << "Unsupported expression in static initializer: ";
-        WriteAsOperand(OS, CE, /*PrintType=*/false,
-                       !AP.MF ? 0 : AP.MF->getFunction()->getParent());
-        report_fatal_error(OS.str());
+      std::string S;
+      raw_string_ostream OS(S);
+      OS << "Unsupported expression in static initializer: ";
+      WriteAsOperand(OS, CE, /*PrintType=*/ false,
+                     !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+      report_fatal_error(OS.str());
     }
   case Instruction::GetElementPtr: {
     const DataLayout &TD = *AP.TM.getDataLayout();
@@ -182,7 +176,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
     // expression properly.  This is important for differences between
     // blockaddress labels.  Since the two labels are in the same function, it
     // is reasonable to treat their delta as a 32-bit value.
-    // FALL THROUGH.
+  // FALL THROUGH.
   case Instruction::BitCast:
     return LowerConstant(CE->getOperand(0), AP);
 
@@ -192,7 +186,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
     // integer type.  This promotes constant folding and simplifies this code.
     Constant *Op = CE->getOperand(0);
     Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
-                                      false/*ZExt*/);
+                                      false /*ZExt*/);
     return LowerConstant(Op, AP);
   }
 
@@ -214,11 +208,12 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
     // the high bits so we are sure to get a proper truncation if the input is
     // a constant expr.
     unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
-    const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
+    const MCExpr *MaskExpr =
+        MCConstantExpr::Create(~0ULL >> (64 - InBits), Ctx);
     return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
   }
 
-  // The MC library also has a right-shift operator, but it isn't consistently
+    // The MC library also has a right-shift operator, but it isn't consistently
   // signed or unsigned between different targets.
   case Instruction::Add:
   case Instruction::Sub:
@@ -232,24 +227,32 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
     const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
     const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
     switch (CE->getOpcode()) {
-    default: llvm_unreachable("Unknown binary operator constant cast expr");
-    case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
-    case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
-    case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
-    case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
-    case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
-    case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
-    case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
-    case Instruction::Or:  return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
-    case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
+    default:
+      llvm_unreachable("Unknown binary operator constant cast expr");
+    case Instruction::Add:
+      return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+    case Instruction::Sub:
+      return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+    case Instruction::Mul:
+      return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
+    case Instruction::SDiv:
+      return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
+    case Instruction::SRem:
+      return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
+    case Instruction::Shl:
+      return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
+    case Instruction::And:
+      return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
+    case Instruction::Or:
+      return MCBinaryExpr::CreateOr(LHS, RHS, Ctx);
+    case Instruction::Xor:
+      return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
     }
   }
   }
 }
 
-
-void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
-{
+void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) {
   if (!EmitLineNumbers)
     return;
   if (ignoreLoc(MI))
@@ -268,7 +271,6 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
   if (curLoc.isUnknown())
     return;
 
-
   const MachineFunction *MF = MI.getParent()->getParent();
   //const TargetMachine &TM = MF->getTarget();
 
@@ -289,14 +291,13 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI)
   if (filenameMap.find(fileName.str()) == filenameMap.end())
     return;
 
-
   // Emit the line from the source file.
   if (llvm::InterleaveSrcInPtx)
     this->emitSrcInText(fileName.str(), curLoc.getLine());
 
   std::stringstream temp;
-  temp << "\t.loc " << filenameMap[fileName.str()]
-       << " " << curLoc.getLine() << " " << curLoc.getCol();
+  temp << "\t.loc " << filenameMap[fileName.str()] << " " << curLoc.getLine()
+       << " " << curLoc.getCol();
   OutStreamer.EmitRawText(Twine(temp.str().c_str()));
 }
 
@@ -309,9 +310,7 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   OutStreamer.EmitRawText(OS.str());
 }
 
-void NVPTXAsmPrinter::printReturnValStr(const Function *F,
-                                        raw_ostream &O)
-{
+void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
   const DataLayout *TD = TM.getDataLayout();
   const TargetLowering *TLI = TM.getTargetLowering();
 
@@ -329,53 +328,49 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F,
       unsigned size = 0;
       if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
         size = ITy->getBitWidth();
-        if (size < 32) size = 32;
+        if (size < 32)
+          size = 32;
       } else {
-        assert(Ty->isFloatingPointTy() &&
-               "Floating point type expected here");
+        assert(Ty->isFloatingPointTy() && "Floating point type expected here");
         size = Ty->getPrimitiveSizeInBits();
       }
 
       O << ".param .b" << size << " func_retval0";
-    }
-    else if (isa<PointerType>(Ty)) {
+    } else if (isa<PointerType>(Ty)) {
       O << ".param .b" << TLI->getPointerTy().getSizeInBits()
-            << " func_retval0";
+        << " func_retval0";
     } else {
-      if ((Ty->getTypeID() == Type::StructTyID) ||
-          isa<VectorType>(Ty)) {
+      if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
         SmallVector<EVT, 16> vtparts;
         ComputeValueVTs(*TLI, Ty, vtparts);
         unsigned totalsz = 0;
-        for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+        for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
           unsigned elems = 1;
           EVT elemtype = vtparts[i];
           if (vtparts[i].isVector()) {
             elems = vtparts[i].getVectorNumElements();
             elemtype = vtparts[i].getVectorElementType();
           }
-          for (unsigned j=0, je=elems; j!=je; ++j) {
+          for (unsigned j = 0, je = elems; j != je; ++j) {
             unsigned sz = elemtype.getSizeInBits();
-            if (elemtype.isInteger() && (sz < 8)) sz = 8;
-            totalsz += sz/8;
+            if (elemtype.isInteger() && (sz < 8))
+              sz = 8;
+            totalsz += sz / 8;
           }
         }
         unsigned retAlignment = 0;
         if (!llvm::getAlign(*F, 0, retAlignment))
           retAlignment = TD->getABITypeAlignment(Ty);
-        O << ".param .align "
-            << retAlignment
-            << " .b8 func_retval0["
-            << totalsz << "]";
+        O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
+          << "]";
       } else
-        assert(false &&
-               "Unknown return type");
+        assert(false && "Unknown return type");
     }
   } else {
     SmallVector<EVT, 16> vtparts;
     ComputeValueVTs(*TLI, Ty, vtparts);
     unsigned idx = 0;
-    for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+    for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
       unsigned elems = 1;
       EVT elemtype = vtparts[i];
       if (vtparts[i].isVector()) {
@@ -383,14 +378,16 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F,
         elemtype = vtparts[i].getVectorElementType();
       }
 
-      for (unsigned j=0, je=elems; j!=je; ++j) {
+      for (unsigned j = 0, je = elems; j != je; ++j) {
         unsigned sz = elemtype.getSizeInBits();
-        if (elemtype.isInteger() && (sz < 32)) sz = 32;
+        if (elemtype.isInteger() && (sz < 32))
+          sz = 32;
         O << ".reg .b" << sz << " func_retval" << idx;
-        if (j<je-1) O << ", ";
+        if (j < je - 1)
+          O << ", ";
         ++idx;
       }
-      if (i < e-1)
+      if (i < e - 1)
         O << ", ";
     }
   }
@@ -411,7 +408,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
   // Set up
   MRI = &MF->getRegInfo();
   F = MF->getFunction();
-  emitLinkageDirective(F,O);
+  emitLinkageDirective(F, O);
   if (llvm::isKernelFunction(*F))
     O << ".entry ";
   else {
@@ -434,7 +431,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
   const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
   unsigned numRegClasses = TRI.getNumRegClasses();
-  VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses+1];
+  VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses + 1];
   OutStreamer.EmitRawText(StringRef("{\n"));
   setAndEmitFunctionVirtualRegisters(*MF);
 
@@ -446,54 +443,63 @@ void NVPTXAsmPrinter::EmitFunctionBodyStart() {
 
 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
   OutStreamer.EmitRawText(StringRef("}\n"));
-  delete []VRidGlobal2LocalMap;
+  delete[] VRidGlobal2LocalMap;
 }
 
-
-void
-NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function& F,
-                                              raw_ostream &O) const {
+void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
+                                                   raw_ostream &O) const {
   // If the NVVM IR has some of reqntid* specified, then output
   // the reqntid directive, and set the unspecified ones to 1.
   // If none of reqntid* is specified, don't output reqntid directive.
   unsigned reqntidx, reqntidy, reqntidz;
   bool specified = false;
-  if (llvm::getReqNTIDx(F, reqntidx) == false) reqntidx = 1;
-  else specified = true;
-  if (llvm::getReqNTIDy(F, reqntidy) == false) reqntidy = 1;
-  else specified = true;
-  if (llvm::getReqNTIDz(F, reqntidz) == false) reqntidz = 1;
-  else specified = true;
+  if (llvm::getReqNTIDx(F, reqntidx) == false)
+    reqntidx = 1;
+  else
+    specified = true;
+  if (llvm::getReqNTIDy(F, reqntidy) == false)
+    reqntidy = 1;
+  else
+    specified = true;
+  if (llvm::getReqNTIDz(F, reqntidz) == false)
+    reqntidz = 1;
+  else
+    specified = true;
 
   if (specified)
-    O << ".reqntid " << reqntidx << ", "
-    << reqntidy << ", " << reqntidz << "\n";
+    O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
+      << "\n";
 
   // If the NVVM IR has some of maxntid* specified, then output
   // the maxntid directive, and set the unspecified ones to 1.
   // If none of maxntid* is specified, don't output maxntid directive.
   unsigned maxntidx, maxntidy, maxntidz;
   specified = false;
-  if (llvm::getMaxNTIDx(F, maxntidx) == false) maxntidx = 1;
-  else specified = true;
-  if (llvm::getMaxNTIDy(F, maxntidy) == false) maxntidy = 1;
-  else specified = true;
-  if (llvm::getMaxNTIDz(F, maxntidz) == false) maxntidz = 1;
-  else specified = true;
+  if (llvm::getMaxNTIDx(F, maxntidx) == false)
+    maxntidx = 1;
+  else
+    specified = true;
+  if (llvm::getMaxNTIDy(F, maxntidy) == false)
+    maxntidy = 1;
+  else
+    specified = true;
+  if (llvm::getMaxNTIDz(F, maxntidz) == false)
+    maxntidz = 1;
+  else
+    specified = true;
 
   if (specified)
-    O << ".maxntid " << maxntidx << ", "
-    << maxntidy << ", " << maxntidz << "\n";
+    O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
+      << "\n";
 
   unsigned mincta;
   if (llvm::getMinCTASm(F, mincta))
     O << ".minnctapersm " << mincta << "\n";
 }
 
-void
-NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
-                                        raw_ostream &O) {
-  const TargetRegisterClass * RC = MRI->getRegClass(vr);
+void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
+                                             raw_ostream &O) {
+  const TargetRegisterClass *RC = MRI->getRegClass(vr);
   unsigned id = RC->getID();
 
   std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[id];
@@ -506,44 +512,38 @@ NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
   report_fatal_error("Bad register!");
 }
 
-void
-NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
-                                     raw_ostream &O) {
+void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, bool isVec,
+                                          raw_ostream &O) {
   getVirtualRegisterName(vr, isVec, O);
 }
 
-void NVPTXAsmPrinter::printVecModifiedImmediate(const MachineOperand &MO,
-                                                const char *Modifier,
-                                                raw_ostream &O) {
-  static const char vecelem[] = {'0', '1', '2', '3', '0', '1', '2', '3'};
-  int Imm = (int)MO.getImm();
-  if(0 == strcmp(Modifier, "vecelem"))
+void NVPTXAsmPrinter::printVecModifiedImmediate(
+    const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
+  static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
+  int Imm = (int) MO.getImm();
+  if (0 == strcmp(Modifier, "vecelem"))
     O << "_" << vecelem[Imm];
-  else if(0 == strcmp(Modifier, "vecv4comm1")) {
-    if((Imm < 0) || (Imm > 3))
+  else if (0 == strcmp(Modifier, "vecv4comm1")) {
+    if ((Imm < 0) || (Imm > 3))
       O << "//";
-  }
-  else if(0 == strcmp(Modifier, "vecv4comm2")) {
-    if((Imm < 4) || (Imm > 7))
+  } else if (0 == strcmp(Modifier, "vecv4comm2")) {
+    if ((Imm < 4) || (Imm > 7))
       O << "//";
-  }
-  else if(0 == strcmp(Modifier, "vecv4pos")) {
-    if(Imm < 0) Imm = 0;
-    O << "_" << vecelem[Imm%4];
-  }
-  else if(0 == strcmp(Modifier, "vecv2comm1")) {
-    if((Imm < 0) || (Imm > 1))
+  } else if (0 == strcmp(Modifier, "vecv4pos")) {
+    if (Imm < 0)
+      Imm = 0;
+    O << "_" << vecelem[Imm % 4];
+  } else if (0 == strcmp(Modifier, "vecv2comm1")) {
+    if ((Imm < 0) || (Imm > 1))
       O << "//";
-  }
-  else if(0 == strcmp(Modifier, "vecv2comm2")) {
-    if((Imm < 2) || (Imm > 3))
+  } else if (0 == strcmp(Modifier, "vecv2comm2")) {
+    if ((Imm < 2) || (Imm > 3))
       O << "//";
-  }
-  else if(0 == strcmp(Modifier, "vecv2pos")) {
-    if(Imm < 0) Imm = 0;
-    O << "_" << vecelem[Imm%2];
-  }
-  else
+  } else if (0 == strcmp(Modifier, "vecv2pos")) {
+    if (Imm < 0)
+      Imm = 0;
+    O << "_" << vecelem[Imm % 2];
+  } else
     llvm_unreachable("Unknown Modifier on immediate operand");
 }
 
@@ -565,7 +565,7 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
           emitVirtualRegister(MO.getReg(), true, O);
         else
           llvm_unreachable(
-                 "Don't know how to handle the modifier on virtual register.");
+              "Don't know how to handle the modifier on virtual register.");
       }
     }
     return;
@@ -576,7 +576,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
     else if (strstr(Modifier, "vec") == Modifier)
       printVecModifiedImmediate(MO, Modifier, O);
     else
-      llvm_unreachable("Don't know how to handle modifier on immediate operand");
+      llvm_unreachable(
+          "Don't know how to handle modifier on immediate operand");
     return;
 
   case MachineOperand::MO_FPImmediate:
@@ -588,18 +589,16 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
     break;
 
   case MachineOperand::MO_ExternalSymbol: {
-    const char * symbname = MO.getSymbolName();
+    const char *symbname = MO.getSymbolName();
     if (strstr(symbname, ".PARAM") == symbname) {
       unsigned index;
-      sscanf(symbname+6, "%u[];", &index);
+      sscanf(symbname + 6, "%u[];", &index);
       printParamName(index, O);
-    }
-    else if (strstr(symbname, ".HLPPARAM") == symbname) {
+    } else if (strstr(symbname, ".HLPPARAM") == symbname) {
       unsigned index;
-      sscanf(symbname+9, "%u[];", &index);
+      sscanf(symbname + 9, "%u[];", &index);
       O << *CurrentFnSym << "_param_" << index << "_offset";
-    }
-    else
+    } else
       O << symbname;
     break;
   }
@@ -613,8 +612,8 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
   }
 }
 
-void NVPTXAsmPrinter::
-printImplicitDef(const MachineInstr *MI, raw_ostream &O) const {
+void NVPTXAsmPrinter::printImplicitDef(const MachineInstr *MI,
+                                       raw_ostream &O) const {
 #ifndef __OPTIMIZE__
   O << "\t// Implicit def :";
   //printOperand(MI, 0);
@@ -628,32 +627,41 @@ void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
 
   if (Modifier && !strcmp(Modifier, "add")) {
     O << ", ";
-    printOperand(MI, opNum+1, O);
+    printOperand(MI, opNum + 1, O);
   } else {
-    if (MI->getOperand(opNum+1).isImm() &&
-        MI->getOperand(opNum+1).getImm() == 0)
+    if (MI->getOperand(opNum + 1).isImm() &&
+        MI->getOperand(opNum + 1).getImm() == 0)
       return; // don't print ',0' or '+0'
     O << "+";
-    printOperand(MI, opNum+1, O);
+    printOperand(MI, opNum + 1, O);
   }
 }
 
 void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
-                                    raw_ostream &O, const char *Modifier)
-{
+                                    raw_ostream &O, const char *Modifier) {
   if (Modifier) {
     const MachineOperand &MO = MI->getOperand(opNum);
-    int Imm = (int)MO.getImm();
+    int Imm = (int) MO.getImm();
     if (!strcmp(Modifier, "volatile")) {
       if (Imm)
         O << ".volatile";
     } else if (!strcmp(Modifier, "addsp")) {
       switch (Imm) {
-      case NVPTX::PTXLdStInstCode::GLOBAL: O << ".global"; break;
-      case NVPTX::PTXLdStInstCode::SHARED: O << ".shared"; break;
-      case NVPTX::PTXLdStInstCode::LOCAL: O << ".local"; break;
-      case NVPTX::PTXLdStInstCode::PARAM: O << ".param"; break;
-      case NVPTX::PTXLdStInstCode::CONSTANT: O << ".const"; break;
+      case NVPTX::PTXLdStInstCode::GLOBAL:
+        O << ".global";
+        break;
+      case NVPTX::PTXLdStInstCode::SHARED:
+        O << ".shared";
+        break;
+      case NVPTX::PTXLdStInstCode::LOCAL:
+        O << ".local";
+        break;
+      case NVPTX::PTXLdStInstCode::PARAM:
+        O << ".param";
+        break;
+      case NVPTX::PTXLdStInstCode::CONSTANT:
+        O << ".const";
+        break;
       case NVPTX::PTXLdStInstCode::GENERIC:
         if (!nvptxSubtarget.hasGenericLdSt())
           O << ".global";
@@ -661,31 +669,27 @@ void NVPTXAsmPrinter::printLdStCode(const MachineInstr *MI, int opNum,
       default:
         llvm_unreachable("Wrong Address Space");
       }
-    }
-    else if (!strcmp(Modifier, "sign")) {
-      if (Imm==NVPTX::PTXLdStInstCode::Signed)
+    } else if (!strcmp(Modifier, "sign")) {
+      if (Imm == NVPTX::PTXLdStInstCode::Signed)
         O << "s";
-      else if (Imm==NVPTX::PTXLdStInstCode::Unsigned)
+      else if (Imm == NVPTX::PTXLdStInstCode::Unsigned)
         O << "u";
       else
         O << "f";
-    }
-    else if (!strcmp(Modifier, "vec")) {
-      if (Imm==NVPTX::PTXLdStInstCode::V2)
+    } else if (!strcmp(Modifier, "vec")) {
+      if (Imm == NVPTX::PTXLdStInstCode::V2)
         O << ".v2";
-      else if (Imm==NVPTX::PTXLdStInstCode::V4)
+      else if (Imm == NVPTX::PTXLdStInstCode::V4)
         O << ".v4";
-    }
-    else
+    } else
       llvm_unreachable("Unknown Modifier");
-  }
-  else
+  } else
     llvm_unreachable("Empty Modifier");
 }
 
-void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) {
+void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
 
-  emitLinkageDirective(F,O);
+  emitLinkageDirective(F, O);
   if (llvm::isKernelFunction(*F))
     O << ".entry ";
   else
@@ -696,8 +700,7 @@ void NVPTXAsmPrinter::emitDeclaration (const Function *F, raw_ostream &O) {
   O << ";\n";
 }
 
-static bool usedInGlobalVarDef(const Constant *C)
-{
+static bool usedInGlobalVarDef(const Constant *C) {
   if (!C)
     return false;
 
@@ -707,8 +710,8 @@ static bool usedInGlobalVarDef(const Constant *C)
     return true;
   }
 
-  for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
-      ui!=ue; ++ui) {
+  for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
+       ui != ue; ++ui) {
     const Constant *C = dyn_cast<Constant>(*ui);
     if (usedInGlobalVarDef(C))
       return true;
@@ -716,8 +719,7 @@ static bool usedInGlobalVarDef(const Constant *C)
   return false;
 }
 
-static bool usedInOneFunc(const User *U, Function const *&oneFunc)
-{
+static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
   if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
     if (othergv->getName().str() == "llvm.used")
       return true;
@@ -730,19 +732,17 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc)
         return false;
       oneFunc = curFunc;
       return true;
-    }
-    else
+    } else
       return false;
   }
 
   if (const MDNode *md = dyn_cast<MDNode>(U))
     if (md->hasName() && ((md->getName().str() == "llvm.dbg.gv") ||
-        (md->getName().str() == "llvm.dbg.sp")))
+                          (md->getName().str() == "llvm.dbg.sp")))
       return true;
 
-
-  for (User::const_use_iterator ui=U->use_begin(), ue=U->use_end();
-      ui!=ue; ++ui) {
+  for (User::const_use_iterator ui = U->use_begin(), ue = U->use_end();
+       ui != ue; ++ui) {
     if (usedInOneFunc(*ui, oneFunc) == false)
       return false;
   }
@@ -776,16 +776,18 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
 
 static bool useFuncSeen(const Constant *C,
                         llvm::DenseMap<const Function *, bool> &seenMap) {
-  for (Value::const_use_iterator ui=C->use_begin(), ue=C->use_end();
-      ui!=ue; ++ui) {
+  for (Value::const_use_iterator ui = C->use_begin(), ue = C->use_end();
+       ui != ue; ++ui) {
     if (const Constant *cu = dyn_cast<Constant>(*ui)) {
       if (useFuncSeen(cu, seenMap))
         return true;
     } else if (const Instruction *I = dyn_cast<Instruction>(*ui)) {
       const BasicBlock *bb = I->getParent();
-      if (!bb) continue;
+      if (!bb)
+        continue;
       const Function *caller = bb->getParent();
-      if (!caller) continue;
+      if (!caller)
+        continue;
       if (seenMap.find(caller) != seenMap.end())
         return true;
     }
@@ -793,10 +795,9 @@ static bool useFuncSeen(const Constant *C,
   return false;
 }
 
-void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) {
   llvm::DenseMap<const Function *, bool> seenMap;
-  for (Module::const_iterator FI=M.begin(), FE=M.end();
-      FI!=FE; ++FI) {
+  for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
     const Function *F = FI;
 
     if (F->isDeclaration()) {
@@ -808,8 +809,9 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
       emitDeclaration(F, O);
       continue;
     }
-    for (Value::const_use_iterator iter=F->use_begin(),
-        iterEnd=F->use_end(); iter!=iterEnd; ++iter) {
+    for (Value::const_use_iterator iter = F->use_begin(),
+                                   iterEnd = F->use_end();
+         iter != iterEnd; ++iter) {
       if (const Constant *C = dyn_cast<Constant>(*iter)) {
         if (usedInGlobalVarDef(C)) {
           // The use is in the initialization of a global variable
@@ -828,12 +830,15 @@ void NVPTXAsmPrinter::emitDeclarations (Module &M, raw_ostream &O) {
         }
       }
 
-      if (!isa<Instruction>(*iter)) continue;
+      if (!isa<Instruction>(*iter))
+        continue;
       const Instruction *instr = cast<Instruction>(*iter);
       const BasicBlock *bb = instr->getParent();
-      if (!bb) continue;
+      if (!bb)
+        continue;
       const Function *caller = bb->getParent();
-      if (!caller) continue;
+      if (!caller)
+        continue;
 
       // If a caller has already been seen, then the caller is
       // appearing in the module before the callee. so print out
@@ -852,9 +857,10 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
   DebugInfoFinder DbgFinder;
   DbgFinder.processModule(M);
 
-  unsigned i=1;
+  unsigned i = 1;
   for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
-      E = DbgFinder.compile_unit_end(); I != E; ++I) {
+                                 E = DbgFinder.compile_unit_end();
+       I != E; ++I) {
     DICompileUnit DIUnit(*I);
     StringRef Filename(DIUnit.getFilename());
     StringRef Dirname(DIUnit.getDirectory());
@@ -871,7 +877,8 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
   }
 
   for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
-      E = DbgFinder.subprogram_end(); I != E; ++I) {
+                                 E = DbgFinder.subprogram_end();
+       I != E; ++I) {
     DISubprogram SP(*I);
     StringRef Filename(SP.getFilename());
     StringRef Dirname(SP.getDirectory());
@@ -887,7 +894,7 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) {
   }
 }
 
-bool NVPTXAsmPrinter::doInitialization (Module &M) {
+bool NVPTXAsmPrinter::doInitialization(Module &M) {
 
   SmallString<128> Str1;
   raw_svector_ostream OS1(Str1);
@@ -899,8 +906,8 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
   //bool Result = AsmPrinter::doInitialization(M);
 
   // Initialize TargetLoweringObjectFile.
-  const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
-          .Initialize(OutContext, TM);
+  const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
+      .Initialize(OutContext, TM);
 
   Mang = new Mangler(OutContext, *TM.getDataLayout());
 
@@ -908,11 +915,9 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
   emitHeader(M, OS1);
   OutStreamer.EmitRawText(OS1.str());
 
-
   // Already commented out
   //bool Result = AsmPrinter::doInitialization(M);
 
-
   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)
     recordAndEmitFilenames(M);
 
@@ -926,16 +931,16 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
   // global variable in order, and ensure that we emit it *after* its dependent
   // globals. We use a little extra memory maintaining both a set and a list to
   // have fast searches while maintaining a strict ordering.
-  SmallVector<GlobalVariable*,8> Globals;
-  DenseSet<GlobalVariable*> GVVisited;
-  DenseSet<GlobalVariable*> GVVisiting;
+  SmallVector<GlobalVariable *, 8> Globals;
+  DenseSet<GlobalVariable *> GVVisited;
+  DenseSet<GlobalVariable *> GVVisiting;
 
   // Visit each global variable, in order
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
+       ++I)
     VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
 
-  assert(GVVisited.size() == M.getGlobalList().size() && 
+  assert(GVVisited.size() == M.getGlobalList().size() &&
          "Missed a global variable");
   assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
 
@@ -946,10 +951,10 @@ bool NVPTXAsmPrinter::doInitialization (Module &M) {
   OS2 << '\n';
 
   OutStreamer.EmitRawText(OS2.str());
-  return false;  // success
+  return false; // success
 }
 
-void NVPTXAsmPrinter::emitHeader (Module &M, raw_ostream &O) {
+void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) {
   O << "//\n";
   O << "// Generated by LLVM NVPTX Back-End\n";
   O << "//\n";
@@ -989,12 +994,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
 
   Module::GlobalListType &global_list = M.getGlobalList();
   int i, n = global_list.size();
-  GlobalVariable **gv_array = new GlobalVariable* [n];
+  GlobalVariable **gv_array = new GlobalVariable *[n];
 
   // first, back-up GlobalVariable in gv_array
   i = 0;
   for (Module::global_iterator I = global_list.begin(), E = global_list.end();
-      I != E; ++I)
+       I != E; ++I)
     gv_array[i++] = &*I;
 
   // second, empty global_list
@@ -1005,13 +1010,12 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
   bool ret = AsmPrinter::doFinalization(M);
 
   // now we restore global variables
-  for (i = 0; i < n; i ++)
+  for (i = 0; i < n; i++)
     global_list.insert(global_list.end(), gv_array[i]);
 
   delete[] gv_array;
   return ret;
 
-
   //bool Result = AsmPrinter::doFinalization(M);
   // Instead of calling the parents doFinalization, we may
   // clone parents doFinalization and customize here.
@@ -1031,8 +1035,8 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
 // external without init                  -> .extern
 // appending                              -> not allowed, assert.
 
-void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O)
-{
+void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
+                                           raw_ostream &O) {
   if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) {
     if (V->hasExternalLinkage()) {
       if (isa<GlobalVariable>(V)) {
@@ -1059,8 +1063,7 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue* V, raw_ostream &O)
   }
 }
 
-
-void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
+void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O,
                                          bool processDemoted) {
 
   // Skip meta data
@@ -1111,30 +1114,48 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
     if (Initializer)
       CI = dyn_cast<ConstantInt>(Initializer);
     if (CI) {
-      unsigned sample=CI->getZExtValue();
+      unsigned sample = CI->getZExtValue();
 
       O << " = { ";
 
-      for (int i =0, addr=((sample & __CLK_ADDRESS_MASK ) >>
-          __CLK_ADDRESS_BASE) ; i < 3 ; i++) {
+      for (int i = 0,
+               addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
+           i < 3; i++) {
         O << "addr_mode_" << i << " = ";
         switch (addr) {
-        case 0: O << "wrap"; break;
-        case 1: O << "clamp_to_border"; break;
-        case 2: O << "clamp_to_edge"; break;
-        case 3: O << "wrap"; break;
-        case 4: O << "mirror"; break;
+        case 0:
+          O << "wrap";
+          break;
+        case 1:
+          O << "clamp_to_border";
+          break;
+        case 2:
+          O << "clamp_to_edge";
+          break;
+        case 3:
+          O << "wrap";
+          break;
+        case 4:
+          O << "mirror";
+          break;
         }
-        O <<", ";
+        O << ", ";
       }
       O << "filter_mode = ";
-      switch (( sample & __CLK_FILTER_MASK ) >> __CLK_FILTER_BASE ) {
-      case 0: O << "nearest"; break;
-      case 1: O << "linear";  break;
-      case 2: assert ( 0 && "Anisotropic filtering is not supported");
-      default: O << "nearest"; break;
+      switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
+      case 0:
+        O << "nearest";
+        break;
+      case 1:
+        O << "linear";
+        break;
+      case 2:
+        assert(0 && "Anisotropic filtering is not supported");
+      default:
+        O << "nearest";
+        break;
       }
-      if (!(( sample &__CLK_NORMALIZED_MASK ) >> __CLK_NORMALIZED_BASE)) {
+      if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
         O << ", force_unnormalized_coords = 1";
       }
       O << " }";
@@ -1176,7 +1197,6 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
   else
     O << " .align " << GVar->getAlignment();
 
-
   if (ETy->isPrimitiveType() || ETy->isIntegerTy() || isa<PointerType>(ETy)) {
     O << " .";
     O << getPTXFundamentalTypeStr(ETy, false);
@@ -1186,17 +1206,17 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
     // Ptx allows variable initilization only for constant and global state
     // spaces.
     if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
-        (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
-        (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
-        && GVar->hasInitializer()) {
+         (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
+         (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
+        GVar->hasInitializer()) {
       Constant *Initializer = GVar->getInitializer();
       if (!Initializer->isNullValue()) {
-        O << " = " ;
+        O << " = ";
         printScalarConstant(Initializer, O);
       }
     }
   } else {
-    unsigned int ElementSize =0;
+    unsigned int ElementSize = 0;
 
     // Although PTX has direct support for struct type and array type and
     // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
@@ -1210,54 +1230,49 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
       // Ptx allows variable initilization only for constant and
       // global state spaces.
       if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
-          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
-          (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST))
-          && GVar->hasInitializer()) {
+           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) ||
+           (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) &&
+          GVar->hasInitializer()) {
         Constant *Initializer = GVar->getInitializer();
-        if (!isa<UndefValue>(Initializer) &&
-            !Initializer->isNullValue()) {
+        if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
           AggBuffer aggBuffer(ElementSize, O, *this);
           bufferAggregateConstant(Initializer, &aggBuffer);
           if (aggBuffer.numSymbols) {
             if (nvptxSubtarget.is64Bit()) {
-              O << " .u64 " << *Mang->getSymbol(GVar) <<"[" ;
-              O << ElementSize/8;
-            }
-            else {
-              O << " .u32 " << *Mang->getSymbol(GVar) <<"[" ;
-              O << ElementSize/4;
+              O << " .u64 " << *Mang->getSymbol(GVar) << "[";
+              O << ElementSize / 8;
+            } else {
+              O << " .u32 " << *Mang->getSymbol(GVar) << "[";
+              O << ElementSize / 4;
             }
             O << "]";
-          }
-          else {
-            O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
+          } else {
+            O << " .b8 " << *Mang->getSymbol(GVar) << "[";
             O << ElementSize;
             O << "]";
           }
-          O << " = {" ;
+          O << " = {";
           aggBuffer.print();
           O << "}";
-        }
-        else {
-          O << " .b8 " << *Mang->getSymbol(GVar) ;
+        } else {
+          O << " .b8 " << *Mang->getSymbol(GVar);
           if (ElementSize) {
-            O <<"[" ;
+            O << "[";
             O << ElementSize;
             O << "]";
           }
         }
-      }
-      else {
+      } else {
         O << " .b8 " << *Mang->getSymbol(GVar);
         if (ElementSize) {
-          O <<"[" ;
+          O << "[";
           O << ElementSize;
           O << "]";
         }
       }
       break;
     default:
-      assert( 0 && "type not supported yet");
+      assert(0 && "type not supported yet");
     }
 
   }
@@ -1270,7 +1285,7 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
 
   std::vector<GlobalVariable *> &gvars = localDecls[f];
 
-  for (unsigned i=0, e=gvars.size(); i!=e; ++i) {
+  for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
     O << "\t// demoted variable\n\t";
     printModuleLevelGV(gvars[i], O, true);
   }
@@ -1280,24 +1295,24 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
                                           raw_ostream &O) const {
   switch (AddressSpace) {
   case llvm::ADDRESS_SPACE_LOCAL:
-    O << "local" ;
+    O << "local";
     break;
   case llvm::ADDRESS_SPACE_GLOBAL:
-    O << "global" ;
+    O << "global";
     break;
   case llvm::ADDRESS_SPACE_CONST:
     // This logic should be consistent with that in
     // getCodeAddrSpace() (NVPTXISelDATToDAT.cpp)
     if (nvptxSubtarget.hasGenericLdSt())
-      O << "global" ;
+      O << "global";
     else
-      O << "const" ;
+      O << "const";
     break;
   case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
-    O << "const" ;
+    O << "const";
     break;
   case llvm::ADDRESS_SPACE_SHARED:
-    O << "shared" ;
+    O << "shared";
     break;
   default:
     report_fatal_error("Bad address space found while emitting PTX");
@@ -1305,8 +1320,8 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
   }
 }
 
-std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty,
-                                                      bool useB4PTR) const {
+std::string
+NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
   switch (Ty->getTypeID()) {
   default:
     llvm_unreachable("unexpected type");
@@ -1330,17 +1345,20 @@ std::string NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty,
     return "f64";
   case Type::PointerTyID:
     if (nvptxSubtarget.is64Bit())
-      if (useB4PTR) return "b64";
-      else return "u64";
+      if (useB4PTR)
+        return "b64";
+      else
+        return "u64";
+    else if (useB4PTR)
+      return "b32";
     else
-      if (useB4PTR) return "b32";
-      else return "u32";
+      return "u32";
   }
   llvm_unreachable("unexpected type");
   return NULL;
 }
 
-void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
+void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
                                             raw_ostream &O) {
 
   const DataLayout *TD = TM.getDataLayout();
@@ -1364,7 +1382,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
     return;
   }
 
-  int64_t ElementSize =0;
+  int64_t ElementSize = 0;
 
   // Although PTX has direct support for struct type and array type and LLVM IR
   // is very similar to PTX, the LLVM CodeGen does not support for targets that
@@ -1375,22 +1393,19 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable* GVar,
   case Type::ArrayTyID:
   case Type::VectorTyID:
     ElementSize = TD->getTypeStoreSize(ETy);
-    O << " .b8 " << *Mang->getSymbol(GVar) <<"[" ;
+    O << " .b8 " << *Mang->getSymbol(GVar) << "[";
     if (ElementSize) {
-      O << itostr(ElementSize) ;
+      O << itostr(ElementSize);
     }
     O << "]";
     break;
   default:
-    assert( 0 && "type not supported yet");
+    assert(0 && "type not supported yet");
   }
-  return ;
+  return;
 }
 
-
-static unsigned int
-getOpenCLAlignment(const DataLayout *TD,
-                   Type *Ty) {
+static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
   if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<PointerType>(Ty))
     return TD->getPrefTypeAlignment(Ty);
 
@@ -1404,9 +1419,9 @@ getOpenCLAlignment(const DataLayout *TD,
     unsigned int numE = VTy->getNumElements();
     unsigned int alignE = TD->getPrefTypeAlignment(ETy);
     if (numE == 3)
-      return 4*alignE;
+      return 4 * alignE;
     else
-      return numE*alignE;
+      return numE * alignE;
   }
 
   const StructType *STy = dyn_cast<StructType>(Ty);
@@ -1414,7 +1429,7 @@ getOpenCLAlignment(const DataLayout *TD,
     unsigned int alignStruct = 1;
     // Go through each element of the struct and find the
     // largest alignment.
-    for (unsigned i=0, e=STy->getNumElements(); i != e; i++) {
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
       Type *ETy = STy->getElementType(i);
       unsigned int align = getOpenCLAlignment(TD, ETy);
       if (align > alignStruct)
@@ -1458,7 +1473,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
   }
 
   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, i++) {
-    if (i==paramIndex) {
+    if (i == paramIndex) {
       printParamName(I, paramIndex, O);
       return;
     }
@@ -1466,8 +1481,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
   llvm_unreachable("paramIndex out of bound");
 }
 
-void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
-                                            raw_ostream &O) {
+void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
   const DataLayout *TD = TM.getDataLayout();
   const AttributeSet &PAL = F->getAttributes();
   const TargetLowering *TLI = TM.getTargetLowering();
@@ -1481,7 +1495,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
   O << "(\n";
 
   for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
-    const Type *Ty = I->getType();
+    Type *Ty = I->getType();
 
     if (!first)
       O << ",\n";
@@ -1496,14 +1510,28 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
           O << "\t.param .surfref " << *CurrentFnSym << "_param_" << paramIndex;
         else // Default image is read_only
           O << "\t.param .texref " << *CurrentFnSym << "_param_" << paramIndex;
-      }
-      else // Should be llvm::isSampler(*I)
+      } else // Should be llvm::isSampler(*I)
         O << "\t.param .samplerref " << *CurrentFnSym << "_param_"
-        << paramIndex;
+          << paramIndex;
       continue;
     }
 
-    if (PAL.hasAttribute(paramIndex+1, Attribute::ByVal) == false) {
+    if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
+      if (Ty->isVectorTy()) {
+        // Just print .param .b8 .align <a> .param[size];
+        // <a> = PAL.getparamalignment
+        // size = typeallocsize of element type
+        unsigned align = PAL.getParamAlignment(paramIndex + 1);
+        if (align == 0)
+          align = TD->getABITypeAlignment(Ty);
+
+        unsigned sz = TD->getTypeAllocSize(Ty);
+        O << "\t.param .align " << align << " .b8 ";
+        printParamName(I, paramIndex, O);
+        O << "[" << sz << "]";
+
+        continue;
+      }
       // Just a scalar
       const PointerType *PTy = dyn_cast<PointerType>(Ty);
       if (isKernelFunc) {
@@ -1514,7 +1542,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
           if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) {
             Type *ETy = PTy->getElementType();
             int addrSpace = PTy->getAddressSpace();
-            switch(addrSpace) {
+            switch (addrSpace) {
             default:
               O << ".ptr ";
               break;
@@ -1529,15 +1557,14 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
               O << ".ptr .global ";
               break;
             }
-            O << ".align " << (int)getOpenCLAlignment(TD, ETy) << " ";
+            O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
           }
           printParamName(I, paramIndex, O);
           continue;
         }
 
         // non-pointer scalar to kernel func
-        O << "\t.param ."
-            << getPTXFundamentalTypeStr(Ty) << " ";
+        O << "\t.param ." << getPTXFundamentalTypeStr(Ty) << " ";
         printParamName(I, paramIndex, O);
         continue;
       }
@@ -1546,9 +1573,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
       unsigned sz = 0;
       if (isa<IntegerType>(Ty)) {
         sz = cast<IntegerType>(Ty)->getBitWidth();
-        if (sz < 32) sz = 32;
-      }
-      else if (isa<PointerType>(Ty))
+        if (sz < 32)
+          sz = 32;
+      } else if (isa<PointerType>(Ty))
         sz = thePointerTy.getSizeInBits();
       else
         sz = Ty->getPrimitiveSizeInBits();
@@ -1562,21 +1589,19 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
 
     // param has byVal attribute. So should be a pointer
     const PointerType *PTy = dyn_cast<PointerType>(Ty);
-    assert(PTy &&
-           "Param with byval attribute should be a pointer type");
+    assert(PTy && "Param with byval attribute should be a pointer type");
     Type *ETy = PTy->getElementType();
 
     if (isABI || isKernelFunc) {
       // Just print .param .b8 .align <a> .param[size];
       // <a> = PAL.getparamalignment
       // size = typeallocsize of element type
-      unsigned align = PAL.getParamAlignment(paramIndex+1);
+      unsigned align = PAL.getParamAlignment(paramIndex + 1);
       if (align == 0)
         align = TD->getABITypeAlignment(ETy);
 
       unsigned sz = TD->getTypeAllocSize(ETy);
-      O << "\t.param .align " << align
-          << " .b8 ";
+      O << "\t.param .align " << align << " .b8 ";
       printParamName(I, paramIndex, O);
       O << "[" << sz << "]";
       continue;
@@ -1587,7 +1612,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
       // each vector element.
       SmallVector<EVT, 16> vtparts;
       ComputeValueVTs(*TLI, ETy, vtparts);
-      for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+      for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
         unsigned elems = 1;
         EVT elemtype = vtparts[i];
         if (vtparts[i].isVector()) {
@@ -1595,15 +1620,17 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
           elemtype = vtparts[i].getVectorElementType();
         }
 
-        for (unsigned j=0,je=elems; j!=je; ++j) {
+        for (unsigned j = 0, je = elems; j != je; ++j) {
           unsigned sz = elemtype.getSizeInBits();
-          if (elemtype.isInteger() && (sz < 32)) sz = 32;
+          if (elemtype.isInteger() && (sz < 32))
+            sz = 32;
           O << "\t.reg .b" << sz << " ";
           printParamName(I, paramIndex, O);
-          if (j<je-1) O << ",\n";
+          if (j < je - 1)
+            O << ",\n";
           ++paramIndex;
         }
-        if (i<e-1)
+        if (i < e - 1)
           O << ",\n";
       }
       --paramIndex;
@@ -1620,9 +1647,8 @@ void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
   emitFunctionParamList(F, O);
 }
 
-
-void NVPTXAsmPrinter::
-setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
+void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
+    const MachineFunction &MF) {
   SmallString<128> Str;
   raw_svector_ostream O(Str);
 
@@ -1635,14 +1661,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   int NumBytes = (int) MFI->getStackSize();
   if (NumBytes) {
-    O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t"
-        << DEPOTNAME
-        << getFunctionNumber() << "[" << NumBytes << "];\n";
+    O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
+      << getFunctionNumber() << "[" << NumBytes << "];\n";
     if (nvptxSubtarget.is64Bit()) {
       O << "\t.reg .b64 \t%SP;\n";
       O << "\t.reg .b64 \t%SPL;\n";
-    }
-    else {
+    } else {
       O << "\t.reg .b32 \t%SP;\n";
       O << "\t.reg .b32 \t%SPL;\n";
     }
@@ -1653,12 +1677,12 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
   // register number and the per class virtual register number.
   // We use the per class virtual register number in the ptx output.
   unsigned int numVRs = MRI->getNumVirtRegs();
-  for (unsigned i=0; i< numVRs; i++) {
+  for (unsigned i = 0; i < numVRs; i++) {
     unsigned int vr = TRI->index2VirtReg(i);
     const TargetRegisterClass *RC = MRI->getRegClass(vr);
     std::map<unsigned, unsigned> &regmap = VRidGlobal2LocalMap[RC->getID()];
     int n = regmap.size();
-    regmap.insert(std::make_pair(vr, n+1));
+    regmap.insert(std::make_pair(vr, n + 1));
   }
 
   // Emit register declarations
@@ -1702,23 +1726,20 @@ setAndEmitFunctionVirtualRegisters(const MachineFunction &MF) {
   OutStreamer.EmitRawText(O.str());
 }
 
-
 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
-  APFloat APF = APFloat(Fp->getValueAPF());  // make a copy
+  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
   bool ignored;
   unsigned int numHex;
   const char *lead;
 
-  if (Fp->getType()->getTypeID()==Type::FloatTyID) {
+  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
     numHex = 8;
     lead = "0f";
-    APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
-                &ignored);
+    APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
   } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
     numHex = 16;
     lead = "0d";
-    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
-                &ignored);
+    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
   } else
     llvm_unreachable("unsupported fp type");
 
@@ -1760,7 +1781,6 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) {
   llvm_unreachable("Not scalar type found in printScalarConstant()");
 }
 
-
 void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
                                    AggBuffer *aggBuffer) {
 
@@ -1768,7 +1788,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
 
   if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
     int s = TD->getTypeAllocSize(CPV->getType());
-    if (s<Bytes)
+    if (s < Bytes)
       s = Bytes;
     aggBuffer->addZeros(s);
     return;
@@ -1779,28 +1799,26 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
 
   case Type::IntegerTyID: {
     const Type *ETy = CPV->getType();
-    if ( ETy == Type::getInt8Ty(CPV->getContext()) ){
+    if (ETy == Type::getInt8Ty(CPV->getContext())) {
       unsigned char c =
           (unsigned char)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
       ptr = &c;
       aggBuffer->addBytes(ptr, 1, Bytes);
-    } else if ( ETy == Type::getInt16Ty(CPV->getContext()) ) {
-      short int16 =
-          (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
-      ptr = (unsigned char*)&int16;
+    } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
+      short int16 = (short)(dyn_cast<ConstantInt>(CPV))->getZExtValue();
+      ptr = (unsigned char *)&int16;
       aggBuffer->addBytes(ptr, 2, Bytes);
-    } else if ( ETy == Type::getInt32Ty(CPV->getContext()) ) {
+    } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
       if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
-        int int32 =(int)(constInt->getZExtValue());
-        ptr = (unsigned char*)&int32;
+        int int32 = (int)(constInt->getZExtValue());
+        ptr = (unsigned char *)&int32;
         aggBuffer->addBytes(ptr, 4, Bytes);
         break;
       } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
-        if (ConstantInt *constInt =
-            dyn_cast<ConstantInt>(ConstantFoldConstantExpression(
-                Cexpr, TD))) {
-          int int32 =(int)(constInt->getZExtValue());
-          ptr = (unsigned char*)&int32;
+        if (ConstantInt *constInt = dyn_cast<ConstantInt>(
+                ConstantFoldConstantExpression(Cexpr, TD))) {
+          int int32 = (int)(constInt->getZExtValue());
+          ptr = (unsigned char *)&int32;
           aggBuffer->addBytes(ptr, 4, Bytes);
           break;
         }
@@ -1812,17 +1830,17 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
         }
       }
       llvm_unreachable("unsupported integer const type");
-    } else if (ETy == Type::getInt64Ty(CPV->getContext()) ) {
+    } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
       if (ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
-        long long int64 =(long long)(constInt->getZExtValue());
-        ptr = (unsigned char*)&int64;
+        long long int64 = (long long)(constInt->getZExtValue());
+        ptr = (unsigned char *)&int64;
         aggBuffer->addBytes(ptr, 8, Bytes);
         break;
       } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
         if (ConstantInt *constInt = dyn_cast<ConstantInt>(
-            ConstantFoldConstantExpression(Cexpr, TD))) {
-          long long int64 =(long long)(constInt->getZExtValue());
-          ptr = (unsigned char*)&int64;
+                ConstantFoldConstantExpression(Cexpr, TD))) {
+          long long int64 = (long long)(constInt->getZExtValue());
+          ptr = (unsigned char *)&int64;
           aggBuffer->addBytes(ptr, 8, Bytes);
           break;
         }
@@ -1841,17 +1859,16 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
   case Type::FloatTyID:
   case Type::DoubleTyID: {
     ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
-    const Type* Ty = CFP->getType();
+    const Type *Ty = CFP->getType();
     if (Ty == Type::getFloatTy(CPV->getContext())) {
-      float float32 = (float)CFP->getValueAPF().convertToFloat();
-      ptr = (unsigned char*)&float32;
+      float float32 = (float) CFP->getValueAPF().convertToFloat();
+      ptr = (unsigned char *)&float32;
       aggBuffer->addBytes(ptr, 4, Bytes);
     } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
       double float64 = CFP->getValueAPF().convertToDouble();
-      ptr = (unsigned char*)&float64;
+      ptr = (unsigned char *)&float64;
       aggBuffer->addBytes(ptr, 8, Bytes);
-    }
-    else {
+    } else {
       llvm_unreachable("unsupported fp const type");
     }
     break;
@@ -1859,8 +1876,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
   case Type::PointerTyID: {
     if (GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
       aggBuffer->addSymbol(GVar);
-    }
-    else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+    } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
       Value *v = Cexpr->stripPointerCasts();
       aggBuffer->addSymbol(v);
     }
@@ -1876,10 +1892,9 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes,
         isa<ConstantStruct>(CPV)) {
       int ElementSize = TD->getTypeAllocSize(CPV->getType());
       bufferAggregateConstant(CPV, aggBuffer);
-      if ( Bytes > ElementSize )
-        aggBuffer->addZeros(Bytes-ElementSize);
-    }
-    else if (isa<ConstantAggregateZero>(CPV))
+      if (Bytes > ElementSize)
+        aggBuffer->addZeros(Bytes - ElementSize);
+    } else if (isa<ConstantAggregateZero>(CPV))
       aggBuffer->addZeros(Bytes);
     else
       llvm_unreachable("Unexpected Constant type");
@@ -1905,7 +1920,7 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
   }
 
   if (const ConstantDataSequential *CDS =
-      dyn_cast<ConstantDataSequential>(CPV)) {
+          dyn_cast<ConstantDataSequential>(CPV)) {
     if (CDS->getNumElements())
       for (unsigned i = 0; i < CDS->getNumElements(); ++i)
         bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
@@ -1913,20 +1928,18 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
     return;
   }
 
-
   if (isa<ConstantStruct>(CPV)) {
     if (CPV->getNumOperands()) {
       StructType *ST = cast<StructType>(CPV->getType());
       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
-        if ( i == (e - 1))
+        if (i == (e - 1))
           Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
-          TD->getTypeAllocSize(ST)
-          - TD->getStructLayout(ST)->getElementOffset(i);
+                  TD->getTypeAllocSize(ST) -
+                  TD->getStructLayout(ST)->getElementOffset(i);
         else
-          Bytes = TD->getStructLayout(ST)->getElementOffset(i+1) -
-          TD->getStructLayout(ST)->getElementOffset(i);
-        bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes,
-                     aggBuffer);
+          Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
+                  TD->getStructLayout(ST)->getElementOffset(i);
+        bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
       }
     }
     return;
@@ -1937,15 +1950,13 @@ void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV,
 // buildTypeNameMap - Run through symbol table looking for type names.
 //
 
-
 bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
 
   std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
 
-  if (PI != TypeNameMap.end() &&
-      (!PI->second.compare("struct._image1d_t") ||
-          !PI->second.compare("struct._image2d_t") ||
-          !PI->second.compare("struct._image3d_t")))
+  if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
+                                  !PI->second.compare("struct._image2d_t") ||
+                                  !PI->second.compare("struct._image3d_t")))
     return true;
 
   return false;
@@ -1955,10 +1966,10 @@ bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
 ///
 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                                       unsigned AsmVariant,
-                                      const char *ExtraCode,
-                                      raw_ostream &O) {
+                                      const char *ExtraCode, raw_ostream &O) {
   if (ExtraCode && ExtraCode[0]) {
-    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+    if (ExtraCode[1] != 0)
+      return true; // Unknown modifier.
 
     switch (ExtraCode[0]) {
     default:
@@ -1974,13 +1985,11 @@ bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
   return false;
 }
 
-bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                            unsigned OpNo,
-                                            unsigned AsmVariant,
-                                            const char *ExtraCode,
-                                            raw_ostream &O) {
+bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
+    const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
+    const char *ExtraCode, raw_ostream &O) {
   if (ExtraCode && ExtraCode[0])
-    return true;  // Unknown modifier
+    return true; // Unknown modifier
 
   O << '[';
   printMemOperand(MI, OpNo, O);
@@ -1989,41 +1998,69 @@ bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
-bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI)
-{
-  switch(MI.getOpcode()) {
+bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
   default:
     return false;
-  case NVPTX::CallArgBeginInst:  case NVPTX::CallArgEndInst0:
-  case NVPTX::CallArgEndInst1:  case NVPTX::CallArgF32:
-  case NVPTX::CallArgF64:  case NVPTX::CallArgI16:
-  case NVPTX::CallArgI32:  case NVPTX::CallArgI32imm:
-  case NVPTX::CallArgI64:  case NVPTX::CallArgI8:
-  case NVPTX::CallArgParam:  case NVPTX::CallVoidInst:
-  case NVPTX::CallVoidInstReg:  case NVPTX::Callseq_End:
+  case NVPTX::CallArgBeginInst:
+  case NVPTX::CallArgEndInst0:
+  case NVPTX::CallArgEndInst1:
+  case NVPTX::CallArgF32:
+  case NVPTX::CallArgF64:
+  case NVPTX::CallArgI16:
+  case NVPTX::CallArgI32:
+  case NVPTX::CallArgI32imm:
+  case NVPTX::CallArgI64:
+  case NVPTX::CallArgI8:
+  case NVPTX::CallArgParam:
+  case NVPTX::CallVoidInst:
+  case NVPTX::CallVoidInstReg:
+  case NVPTX::Callseq_End:
   case NVPTX::CallVoidInstReg64:
-  case NVPTX::DeclareParamInst:  case NVPTX::DeclareRetMemInst:
-  case NVPTX::DeclareRetRegInst:  case NVPTX::DeclareRetScalarInst:
-  case NVPTX::DeclareScalarParamInst:  case NVPTX::DeclareScalarRegInst:
-  case NVPTX::StoreParamF32:  case NVPTX::StoreParamF64:
-  case NVPTX::StoreParamI16:  case NVPTX::StoreParamI32:
-  case NVPTX::StoreParamI64:  case NVPTX::StoreParamI8:
-  case NVPTX::StoreParamS32I8:  case NVPTX::StoreParamU32I8:
-  case NVPTX::StoreParamS32I16:  case NVPTX::StoreParamU32I16:
-  case NVPTX::StoreRetvalF32:  case NVPTX::StoreRetvalF64:
-  case NVPTX::StoreRetvalI16:  case NVPTX::StoreRetvalI32:
-  case NVPTX::StoreRetvalI64:  case NVPTX::StoreRetvalI8:
-  case NVPTX::LastCallArgF32:  case NVPTX::LastCallArgF64:
-  case NVPTX::LastCallArgI16:  case NVPTX::LastCallArgI32:
-  case NVPTX::LastCallArgI32imm:  case NVPTX::LastCallArgI64:
-  case NVPTX::LastCallArgI8:  case NVPTX::LastCallArgParam:
-  case NVPTX::LoadParamMemF32:  case NVPTX::LoadParamMemF64:
-  case NVPTX::LoadParamMemI16:  case NVPTX::LoadParamMemI32:
-  case NVPTX::LoadParamMemI64:  case NVPTX::LoadParamMemI8:
-  case NVPTX::LoadParamRegF32:  case NVPTX::LoadParamRegF64:
-  case NVPTX::LoadParamRegI16:  case NVPTX::LoadParamRegI32:
-  case NVPTX::LoadParamRegI64:  case NVPTX::LoadParamRegI8:
-  case NVPTX::PrototypeInst:   case NVPTX::DBG_VALUE:
+  case NVPTX::DeclareParamInst:
+  case NVPTX::DeclareRetMemInst:
+  case NVPTX::DeclareRetRegInst:
+  case NVPTX::DeclareRetScalarInst:
+  case NVPTX::DeclareScalarParamInst:
+  case NVPTX::DeclareScalarRegInst:
+  case NVPTX::StoreParamF32:
+  case NVPTX::StoreParamF64:
+  case NVPTX::StoreParamI16:
+  case NVPTX::StoreParamI32:
+  case NVPTX::StoreParamI64:
+  case NVPTX::StoreParamI8:
+  case NVPTX::StoreParamS32I8:
+  case NVPTX::StoreParamU32I8:
+  case NVPTX::StoreParamS32I16:
+  case NVPTX::StoreParamU32I16:
+  case NVPTX::StoreRetvalF32:
+  case NVPTX::StoreRetvalF64:
+  case NVPTX::StoreRetvalI16:
+  case NVPTX::StoreRetvalI32:
+  case NVPTX::StoreRetvalI64:
+  case NVPTX::StoreRetvalI8:
+  case NVPTX::LastCallArgF32:
+  case NVPTX::LastCallArgF64:
+  case NVPTX::LastCallArgI16:
+  case NVPTX::LastCallArgI32:
+  case NVPTX::LastCallArgI32imm:
+  case NVPTX::LastCallArgI64:
+  case NVPTX::LastCallArgI8:
+  case NVPTX::LastCallArgParam:
+  case NVPTX::LoadParamMemF32:
+  case NVPTX::LoadParamMemF64:
+  case NVPTX::LoadParamMemI16:
+  case NVPTX::LoadParamMemI32:
+  case NVPTX::LoadParamMemI64:
+  case NVPTX::LoadParamMemI8:
+  case NVPTX::LoadParamRegF32:
+  case NVPTX::LoadParamRegF64:
+  case NVPTX::LoadParamRegI16:
+  case NVPTX::LoadParamRegI32:
+  case NVPTX::LoadParamRegI64:
+  case NVPTX::LoadParamRegI8:
+  case NVPTX::PrototypeInst:
+  case NVPTX::DBG_VALUE:
     return true;
   }
   return false;
@@ -2035,10 +2072,9 @@ extern "C" void LLVMInitializeNVPTXBackendAsmPrinter() {
   RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
 }
 
-
 void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
   std::stringstream temp;
-  LineReader * reader = this->getReader(filename.str());
+  LineReader *reader = this->getReader(filename.str());
   temp << "\n//";
   temp << filename.str();
   temp << ":";
@@ -2049,29 +2085,26 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
   this->OutStreamer.EmitRawText(Twine(temp.str()));
 }
 
-
 LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
-  if (reader == NULL)  {
-    reader =  new LineReader(filename);
+  if (reader == NULL) {
+    reader = new LineReader(filename);
   }
 
   if (reader->fileName() != filename) {
     delete reader;
-    reader =  new LineReader(filename);
+    reader = new LineReader(filename);
   }
 
   return reader;
 }
 
-
-std::string
-LineReader::readLine(unsigned lineNum) {
+std::string LineReader::readLine(unsigned lineNum) {
   if (lineNum < theCurLine) {
     theCurLine = 0;
-    fstr.seekg(0,std::ios::beg);
+    fstr.seekg(0, std::ios::beg);
   }
   while (theCurLine < lineNum) {
-    fstr.getline(buff,500);
+    fstr.getline(buff, 500);
     theCurLine++;
   }
   return buff;
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 42498f0bf7..6dc9fc0ffe 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -43,15 +43,15 @@
 // This is defined in AsmPrinter.cpp.
 // Used to process the constant expressions in initializers.
 namespace nvptx {
-const llvm::MCExpr *LowerConstant(const llvm::Constant *CV,
-                                  llvm::AsmPrinter &AP) ;
+const llvm::MCExpr *
+LowerConstant(const llvm::Constant *CV, llvm::AsmPrinter &AP);
 }
 
 namespace llvm {
 
 class LineReader {
 private:
-  unsigned theCurLine ;
+  unsigned theCurLine;
   std::ifstream fstr;
   char buff[512];
   std::string theFileName;
@@ -63,17 +63,12 @@ public:
     theFileName = filename;
   }
   std::string fileName() { return theFileName; }
-  ~LineReader() {
-    fstr.close();
-  }
+  ~LineReader() { fstr.close(); }
   std::string readLine(unsigned line);
 };
 
-
-
 class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
 
-
   class AggBuffer {
     // Used to buffer the emitted string for initializing global
     // aggregates.
@@ -92,7 +87,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
     // Once we have this AggBuffer setup, we can choose how to print
     // it out.
   public:
-    unsigned size;   // size of the buffer in bytes
+    unsigned size;         // size of the buffer in bytes
     unsigned char *buffer; // the buffer
     unsigned numSymbols;   // number of symbol addresses
     SmallVector<unsigned, 4> symbolPosInBuffer;
@@ -105,33 +100,31 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
 
   public:
     AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
-    :O(_O),AP(_AP) {
+        : O(_O), AP(_AP) {
       buffer = new unsigned char[_size];
       size = _size;
       curpos = 0;
       numSymbols = 0;
     }
-    ~AggBuffer() {
-      delete [] buffer;
-    }
+    ~AggBuffer() { delete[] buffer; }
     unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
-      assert((curpos+Num) <= size);
-      assert((curpos+Bytes) <= size);
-      for ( int i= 0; i < Num; ++i) {
+      assert((curpos + Num) <= size);
+      assert((curpos + Bytes) <= size);
+      for (int i = 0; i < Num; ++i) {
         buffer[curpos] = Ptr[i];
-        curpos ++;
+        curpos++;
       }
-      for ( int i=Num; i < Bytes ; ++i) {
+      for (int i = Num; i < Bytes; ++i) {
         buffer[curpos] = 0;
-        curpos ++;
+        curpos++;
       }
       return curpos;
     }
     unsigned addZeros(int Num) {
-      assert((curpos+Num) <= size);
-      for ( int i= 0; i < Num; ++i) {
+      assert((curpos + Num) <= size);
+      for (int i = 0; i < Num; ++i) {
         buffer[curpos] = 0;
-        curpos ++;
+        curpos++;
       }
       return curpos;
     }
@@ -143,10 +136,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
     void print() {
       if (numSymbols == 0) {
         // print out in bytes
-        for (unsigned i=0; i<size; i++) {
+        for (unsigned i = 0; i < size; i++) {
           if (i)
             O << ", ";
-          O << (unsigned int)buffer[i];
+          O << (unsigned int) buffer[i];
         }
       } else {
         // print out in 4-bytes or 8-bytes
@@ -156,7 +149,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
         unsigned int nBytes = 4;
         if (AP.nvptxSubtarget.is64Bit())
           nBytes = 8;
-        for (pos=0; pos<size; pos+=nBytes) {
+        for (pos = 0; pos < size; pos += nBytes) {
           if (pos)
             O << ", ";
           if (pos == nextSymbolPos) {
@@ -164,22 +157,19 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
             if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
               MCSymbol *Name = AP.Mang->getSymbol(GVar);
               O << *Name;
-            }
-            else if (ConstantExpr *Cexpr =
-                dyn_cast<ConstantExpr>(v)) {
+            } else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
               O << *nvptx::LowerConstant(Cexpr, AP);
             } else
               llvm_unreachable("symbol type unknown");
             nSym++;
             if (nSym >= numSymbols)
-              nextSymbolPos = size+1;
+              nextSymbolPos = size + 1;
             else
               nextSymbolPos = symbolPosInBuffer[nSym];
-          } else
-            if (nBytes == 4)
-              O << *(unsigned int*)(buffer+pos);
-            else
-              O << *(unsigned long long*)(buffer+pos);
+          } else if (nBytes == 4)
+            O << *(unsigned int *)(buffer + pos);
+          else
+            O << *(unsigned long long *)(buffer + pos);
         }
       }
     }
@@ -189,10 +179,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
 
   virtual void emitSrcInText(StringRef filename, unsigned line);
 
-private :
-  virtual const char *getPassName() const {
-    return "NVPTX Assembly Printer";
-  }
+private:
+  virtual const char *getPassName() const { return "NVPTX Assembly Printer"; }
 
   const Function *F;
   std::string CurrentFnName;
@@ -207,31 +195,28 @@ private :
 
   void printGlobalVariable(const GlobalVariable *GVar);
   void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
-                    const char *Modifier=0);
+                    const char *Modifier = 0);
   void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O,
-                     const char *Modifier=0);
-  void printVecModifiedImmediate(const MachineOperand &MO,
-                                 const char *Modifier, raw_ostream &O);
+                     const char *Modifier = 0);
+  void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,
+                                 raw_ostream &O);
   void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
-                       const char *Modifier=0);
+                       const char *Modifier = 0);
   void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
   // definition autogenerated.
   void printInstruction(const MachineInstr *MI, raw_ostream &O);
-  void printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
-                          bool=false);
+  void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false);
   void printParamName(int paramIndex, raw_ostream &O);
   void printParamName(Function::const_arg_iterator I, int paramIndex,
                       raw_ostream &O);
   void emitHeader(Module &M, raw_ostream &O);
-  void emitKernelFunctionDirectives(const Function& F,
-                                    raw_ostream &O) const;
+  void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
   void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
   void emitFunctionExternParamList(const MachineFunction &MF);
   void emitFunctionParamList(const Function *, raw_ostream &O);
   void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
   void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
-  void emitFunctionTempData(const MachineFunction &MF,
-                            unsigned &FrameSize);
+  void emitFunctionTempData(const MachineFunction &MF, unsigned &FrameSize);
   bool isImageType(const Type *Ty);
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                        unsigned AsmVariant, const char *ExtraCode,
@@ -269,17 +254,16 @@ private:
   void recordAndEmitFilenames(Module &);
 
   void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
-  void emitPTXAddressSpace(unsigned int AddressSpace,
-                           raw_ostream &O) const;
-  std::string getPTXFundamentalTypeStr(const Type *Ty, bool=true) const ;
-  void printScalarConstant(Constant *CPV, raw_ostream &O) ;
-  void printFPConstant(const ConstantFP *Fp, raw_ostream &O) ;
-  void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) ;
-  void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer) ;
+  void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
+  std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const;
+  void printScalarConstant(Constant *CPV, raw_ostream &O);
+  void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
+  void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer);
+  void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer);
 
   void printOperandProper(const MachineOperand &MO);
 
-  void emitLinkageDirective(const GlobalValue* V, raw_ostream &O);
+  void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
   void emitDeclarations(Module &, raw_ostream &O);
   void emitDeclaration(const Function *, raw_ostream &O);
 
@@ -289,10 +273,9 @@ private:
   LineReader *reader;
   LineReader *getReader(std::string);
 public:
-  NVPTXAsmPrinter(TargetMachine &TM,
-                  MCStreamer &Streamer)
-  : AsmPrinter(TM, Streamer),
-    nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+  NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer),
+        nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
     CurrentBankselLabelInBasicBlock = "";
     VRidGlobal2LocalMap = NULL;
     reader = NULL;
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index bb2c55ceed..6533da5102 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -25,9 +25,7 @@
 
 using namespace llvm;
 
-bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const {
-  return true;
-}
+bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; }
 
 void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
   if (MF.getFrameInfo()->hasStackObjects()) {
@@ -42,46 +40,39 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
       // mov %SPL, %depot;
       // cvta.local %SP, %SPL;
       if (is64bit) {
-        MachineInstr *MI = BuildMI(MBB, MBBI, dl,
-                               tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
-                                   NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
-        BuildMI(MBB, MI, dl,
-                tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrameLocal)
-        .addReg(NVPTX::VRDepot);
+        MachineInstr *MI = BuildMI(
+            MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
+            NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
+        BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
+                NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
       } else {
-        MachineInstr *MI = BuildMI(MBB, MBBI, dl,
-                                  tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
-                                   NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
-        BuildMI(MBB, MI, dl,
-                tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrameLocal)
-        .addReg(NVPTX::VRDepot);
+        MachineInstr *MI = BuildMI(
+            MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
+            NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
+        BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
+                NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
       }
-    }
-    else {
+    } else {
       // mov %SP, %depot;
       if (is64bit)
-        BuildMI(MBB, MBBI, dl,
-                tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrame)
-                .addReg(NVPTX::VRDepot);
+        BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
+                NVPTX::VRFrame).addReg(NVPTX::VRDepot);
       else
-        BuildMI(MBB, MBBI, dl,
-                tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrame)
-                .addReg(NVPTX::VRDepot);
+        BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
+                NVPTX::VRFrame).addReg(NVPTX::VRDepot);
     }
   }
 }
 
 void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF,
-                                      MachineBasicBlock &MBB) const {
-}
+                                      MachineBasicBlock &MBB) const {}
 
 // This function eliminates ADJCALLSTACKDOWN,
 // ADJCALLSTACKUP pseudo instructions
-void NVPTXFrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
+void NVPTXFrameLowering::eliminateCallFramePseudoInstr(
+    MachineFunction &MF, MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator I) const {
   // Simply discard ADJCALLSTACKDOWN,
   // ADJCALLSTACKUP instructions.
   MBB.erase(I);
 }
-
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index d34e7bec1d..819f1dd3f4 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -16,7 +16,6 @@
 
 #include "llvm/Target/TargetFrameLowering.h"
 
-
 namespace llvm {
 class NVPTXTargetMachine;
 
@@ -26,13 +25,12 @@ class NVPTXFrameLowering : public TargetFrameLowering {
 
 public:
   explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit)
-  : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0),
-    tm(_tm), is64bit(_is64bit) {}
+      : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm),
+        is64bit(_is64bit) {}
 
   virtual bool hasFP(const MachineFunction &MF) const;
   virtual void emitPrologue(MachineFunction &MF) const;
-  virtual void emitEpilogue(MachineFunction &MF,
-                            MachineBasicBlock &MBB) const;
+  virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
 
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 481f13afd1..e862988c85 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #include "NVPTXISelDAGToDAG.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Instructions.h"
@@ -26,27 +25,22 @@
 
 using namespace llvm;
 
-
-static cl::opt<bool>
-UseFMADInstruction("nvptx-mad-enable",
-                   cl::ZeroOrMore,
-                cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
-                   cl::init(false));
+static cl::opt<bool> UseFMADInstruction(
+    "nvptx-mad-enable", cl::ZeroOrMore,
+    cl::desc("NVPTX Specific: Enable generating FMAD instructions"),
+    cl::init(false));
 
 static cl::opt<int>
-FMAContractLevel("nvptx-fma-level",
-                 cl::ZeroOrMore,
+FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore,
                  cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
-                     " 1: do it  2: do it aggressively"),
-                     cl::init(2));
-
+                          " 1: do it  2: do it aggressively"),
+                 cl::init(2));
 
-static cl::opt<int>
-UsePrecDivF32("nvptx-prec-divf32",
-              cl::ZeroOrMore,
-             cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
-                  " IEEE Compliant F32 div.rnd if avaiable."),
-                  cl::init(2));
+static cl::opt<int> UsePrecDivF32(
+    "nvptx-prec-divf32", cl::ZeroOrMore,
+    cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
+             " IEEE Compliant F32 div.rnd if avaiable."),
+    cl::init(2));
 
 /// createNVPTXISelDag - This pass converts a legalized DAG into a
 /// NVPTX-specific DAG, ready for instruction scheduling.
@@ -55,26 +49,22 @@ FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
   return new NVPTXDAGToDAGISel(TM, OptLevel);
 }
 
-
 NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
                                      CodeGenOpt::Level OptLevel)
-: SelectionDAGISel(tm, OptLevel),
-  Subtarget(tm.getSubtarget<NVPTXSubtarget>())
-{
+    : SelectionDAGISel(tm, OptLevel),
+      Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
   // Always do fma.f32 fpcontract if the target supports the instruction.
   // Always do fma.f64 fpcontract if the target supports the instruction.
   // Do mad.f32 is nvptx-mad-enable is specified and the target does not
   // support fma.f32.
 
   doFMADF32 = (OptLevel > 0) && UseFMADInstruction && !Subtarget.hasFMAF32();
-  doFMAF32 =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
-      (FMAContractLevel>=1);
-  doFMAF64 =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
-      (FMAContractLevel>=1);
-  doFMAF32AGG =  (OptLevel > 0) && Subtarget.hasFMAF32() &&
-      (FMAContractLevel==2);
-  doFMAF64AGG =  (OptLevel > 0) && Subtarget.hasFMAF64() &&
-      (FMAContractLevel==2);
+  doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
+  doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
+  doFMAF32AGG =
+      (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
+  doFMAF64AGG =
+      (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
 
   allowFMA = (FMAContractLevel >= 1) || UseFMADInstruction;
 
@@ -92,10 +82,10 @@ NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
 
 /// Select - Select instructions not customized! Used for
 /// expanded, promoted and normal instructions.
-SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
 
   if (N->isMachineOpcode())
-    return NULL;   // Already selected.
+    return NULL; // Already selected.
 
   SDNode *ResNode = NULL;
   switch (N->getOpcode()) {
@@ -119,30 +109,34 @@ SDNode* NVPTXDAGToDAGISel::Select(SDNode *N) {
   case NVPTXISD::StoreV4:
     ResNode = SelectStoreVector(N);
     break;
-  default: break;
+  default:
+    break;
   }
   if (ResNode)
     return ResNode;
   return SelectCode(N);
 }
 
-
-static unsigned int
-getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
-{
+static unsigned int getCodeAddrSpace(MemSDNode *N,
+                                     const NVPTXSubtarget &Subtarget) {
   const Value *Src = N->getSrcValue();
   if (!Src)
     return NVPTX::PTXLdStInstCode::LOCAL;
 
   if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
     switch (PT->getAddressSpace()) {
-    case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
-    case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
-    case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
+    case llvm::ADDRESS_SPACE_LOCAL:
+      return NVPTX::PTXLdStInstCode::LOCAL;
+    case llvm::ADDRESS_SPACE_GLOBAL:
+      return NVPTX::PTXLdStInstCode::GLOBAL;
+    case llvm::ADDRESS_SPACE_SHARED:
+      return NVPTX::PTXLdStInstCode::SHARED;
     case llvm::ADDRESS_SPACE_CONST_NOT_GEN:
       return NVPTX::PTXLdStInstCode::CONSTANT;
-    case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
-    case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
+    case llvm::ADDRESS_SPACE_GENERIC:
+      return NVPTX::PTXLdStInstCode::GENERIC;
+    case llvm::ADDRESS_SPACE_PARAM:
+      return NVPTX::PTXLdStInstCode::PARAM;
     case llvm::ADDRESS_SPACE_CONST:
       // If the arch supports generic address space, translate it to GLOBAL
       // for correctness.
@@ -153,18 +147,18 @@ getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget)
         return NVPTX::PTXLdStInstCode::GLOBAL;
       else
         return NVPTX::PTXLdStInstCode::CONSTANT;
-    default: break;
+    default:
+      break;
     }
   }
   return NVPTX::PTXLdStInstCode::LOCAL;
 }
 
-
-SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   LoadSDNode *LD = cast<LoadSDNode>(N);
   EVT LoadedVT = LD->getMemoryVT();
-  SDNode *NVPTXLD= NULL;
+  SDNode *NVPTXLD = NULL;
 
   // do not support pre/post inc/dec
   if (LD->isIndexed())
@@ -204,7 +198,7 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
   //          type is integer
   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
   MVT ScalarVT = SimpleVT.getScalarType();
-  unsigned fromTypeWidth =  ScalarVT.getSizeInBits();
+  unsigned fromTypeWidth = ScalarVT.getSizeInBits();
   unsigned int fromType;
   if ((LD->getExtensionType() == ISD::SEXTLOAD))
     fromType = NVPTX::PTXLdStInstCode::Signed;
@@ -223,105 +217,166 @@ SDNode* NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
 
   if (SelectDirectAddr(N1, Addr)) {
     switch (TargetVT) {
-    case MVT::i8:    Opcode = NVPTX::LD_i8_avar; break;
-    case MVT::i16:   Opcode = NVPTX::LD_i16_avar; break;
-    case MVT::i32:   Opcode = NVPTX::LD_i32_avar; break;
-    case MVT::i64:   Opcode = NVPTX::LD_i64_avar; break;
-    case MVT::f32:   Opcode = NVPTX::LD_f32_avar; break;
-    case MVT::f64:   Opcode = NVPTX::LD_f64_avar; break;
-    default: return NULL;
+    case MVT::i8:
+      Opcode = NVPTX::LD_i8_avar;
+      break;
+    case MVT::i16:
+      Opcode = NVPTX::LD_i16_avar;
+      break;
+    case MVT::i32:
+      Opcode = NVPTX::LD_i32_avar;
+      break;
+    case MVT::i64:
+      Opcode = NVPTX::LD_i64_avar;
+      break;
+    case MVT::f32:
+      Opcode = NVPTX::LD_f32_avar;
+      break;
+    case MVT::f64:
+      Opcode = NVPTX::LD_f64_avar;
+      break;
+    default:
+      return NULL;
     }
-    SDValue Ops[] = { getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(fromType),
-                      getI32Imm(fromTypeWidth),
-                      Addr, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
-                                     MVT::Other, Ops, 7);
-  } else if (Subtarget.is64Bit()?
-      SelectADDRsi64(N1.getNode(), N1, Base, Offset):
-      SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
+    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(fromType),
+                      getI32Imm(fromTypeWidth), Addr, Chain };
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
+                 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
     switch (TargetVT) {
-    case MVT::i8:    Opcode = NVPTX::LD_i8_asi; break;
-    case MVT::i16:   Opcode = NVPTX::LD_i16_asi; break;
-    case MVT::i32:   Opcode = NVPTX::LD_i32_asi; break;
-    case MVT::i64:   Opcode = NVPTX::LD_i64_asi; break;
-    case MVT::f32:   Opcode = NVPTX::LD_f32_asi; break;
-    case MVT::f64:   Opcode = NVPTX::LD_f64_asi; break;
-    default: return NULL;
+    case MVT::i8:
+      Opcode = NVPTX::LD_i8_asi;
+      break;
+    case MVT::i16:
+      Opcode = NVPTX::LD_i16_asi;
+      break;
+    case MVT::i32:
+      Opcode = NVPTX::LD_i32_asi;
+      break;
+    case MVT::i64:
+      Opcode = NVPTX::LD_i64_asi;
+      break;
+    case MVT::f32:
+      Opcode = NVPTX::LD_f32_asi;
+      break;
+    case MVT::f64:
+      Opcode = NVPTX::LD_f64_asi;
+      break;
+    default:
+      return NULL;
     }
-    SDValue Ops[] = { getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(fromType),
-                      getI32Imm(fromTypeWidth),
-                      Base, Offset, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
-                                     MVT::Other, Ops, 8);
-  } else if (Subtarget.is64Bit()?
-      SelectADDRri64(N1.getNode(), N1, Base, Offset):
-      SelectADDRri(N1.getNode(), N1, Base, Offset)) {
+    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(fromType),
+                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
+                 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
     if (Subtarget.is64Bit()) {
       switch (TargetVT) {
-      case MVT::i8:    Opcode = NVPTX::LD_i8_ari_64; break;
-      case MVT::i16:   Opcode = NVPTX::LD_i16_ari_64; break;
-      case MVT::i32:   Opcode = NVPTX::LD_i32_ari_64; break;
-      case MVT::i64:   Opcode = NVPTX::LD_i64_ari_64; break;
-      case MVT::f32:   Opcode = NVPTX::LD_f32_ari_64; break;
-      case MVT::f64:   Opcode = NVPTX::LD_f64_ari_64; break;
-      default: return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LD_i8_ari_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LD_i16_ari_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LD_i32_ari_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LD_i64_ari_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LD_f32_ari_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LD_f64_ari_64;
+        break;
+      default:
+        return NULL;
       }
     } else {
       switch (TargetVT) {
-      case MVT::i8:    Opcode = NVPTX::LD_i8_ari; break;
-      case MVT::i16:   Opcode = NVPTX::LD_i16_ari; break;
-      case MVT::i32:   Opcode = NVPTX::LD_i32_ari; break;
-      case MVT::i64:   Opcode = NVPTX::LD_i64_ari; break;
-      case MVT::f32:   Opcode = NVPTX::LD_f32_ari; break;
-      case MVT::f64:   Opcode = NVPTX::LD_f64_ari; break;
-      default: return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LD_i8_ari;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LD_i16_ari;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LD_i32_ari;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LD_i64_ari;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LD_f32_ari;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LD_f64_ari;
+        break;
+      default:
+        return NULL;
       }
     }
-    SDValue Ops[] = { getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(fromType),
-                      getI32Imm(fromTypeWidth),
-                      Base, Offset, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
-                                     MVT::Other, Ops, 8);
-  }
-  else {
+    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(fromType),
+                      getI32Imm(fromTypeWidth), Base, Offset, Chain };
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 8);
+  } else {
     if (Subtarget.is64Bit()) {
       switch (TargetVT) {
-      case MVT::i8:    Opcode = NVPTX::LD_i8_areg_64; break;
-      case MVT::i16:   Opcode = NVPTX::LD_i16_areg_64; break;
-      case MVT::i32:   Opcode = NVPTX::LD_i32_areg_64; break;
-      case MVT::i64:   Opcode = NVPTX::LD_i64_areg_64; break;
-      case MVT::f32:   Opcode = NVPTX::LD_f32_areg_64; break;
-      case MVT::f64:   Opcode = NVPTX::LD_f64_areg_64; break;
-      default: return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LD_i8_areg_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LD_i16_areg_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LD_i32_areg_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LD_i64_areg_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LD_f32_areg_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LD_f64_areg_64;
+        break;
+      default:
+        return NULL;
       }
     } else {
       switch (TargetVT) {
-      case MVT::i8:    Opcode = NVPTX::LD_i8_areg; break;
-      case MVT::i16:   Opcode = NVPTX::LD_i16_areg; break;
-      case MVT::i32:   Opcode = NVPTX::LD_i32_areg; break;
-      case MVT::i64:   Opcode = NVPTX::LD_i64_areg; break;
-      case MVT::f32:   Opcode = NVPTX::LD_f32_areg; break;
-      case MVT::f64:   Opcode = NVPTX::LD_f64_areg; break;
-      default: return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LD_i8_areg;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LD_i16_areg;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LD_i32_areg;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LD_i64_areg;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LD_f32_areg;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LD_f64_areg;
+        break;
+      default:
+        return NULL;
       }
     }
-    SDValue Ops[] = { getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(fromType),
-                      getI32Imm(fromTypeWidth),
-                      N1, Chain };
-    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT,
-                                     MVT::Other, Ops, 7);
+    SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(fromType),
+                      getI32Imm(fromTypeWidth), N1, Chain };
+    NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops, 7);
   }
 
   if (NVPTXLD != NULL) {
@@ -344,9 +399,8 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
   MemSDNode *MemSD = cast<MemSDNode>(N);
   EVT LoadedVT = MemSD->getMemoryVT();
 
-
   if (!LoadedVT.isSimple())
-     return NULL;
+    return NULL;
 
   // Address Space Setting
   unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
@@ -369,11 +423,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
   //          type is integer
   // Float  : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
   MVT ScalarVT = SimpleVT.getScalarType();
-  unsigned FromTypeWidth =  ScalarVT.getSizeInBits();
+  unsigned FromTypeWidth = ScalarVT.getSizeInBits();
   unsigned int FromType;
   // The last operand holds the original LoadSDNode::getExtensionType() value
-  unsigned ExtensionType =
-    cast<ConstantSDNode>(N->getOperand(N->getNumOperands()-1))->getZExtValue();
+  unsigned ExtensionType = cast<ConstantSDNode>(
+      N->getOperand(N->getNumOperands() - 1))->getZExtValue();
   if (ExtensionType == ISD::SEXTLOAD)
     FromType = NVPTX::PTXLdStInstCode::Signed;
   else if (ScalarVT.isFloatingPoint())
@@ -384,197 +438,328 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
   unsigned VecType;
 
   switch (N->getOpcode()) {
-  case NVPTXISD::LoadV2:  VecType = NVPTX::PTXLdStInstCode::V2; break;
-  case NVPTXISD::LoadV4:  VecType = NVPTX::PTXLdStInstCode::V4; break;
-  default: return NULL;
+  case NVPTXISD::LoadV2:
+    VecType = NVPTX::PTXLdStInstCode::V2;
+    break;
+  case NVPTXISD::LoadV4:
+    VecType = NVPTX::PTXLdStInstCode::V4;
+    break;
+  default:
+    return NULL;
   }
 
   EVT EltVT = N->getValueType(0);
 
   if (SelectDirectAddr(Op1, Addr)) {
     switch (N->getOpcode()) {
-    default: return NULL;
+    default:
+      return NULL;
     case NVPTXISD::LoadV2:
       switch (EltVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_avar; break;
-      case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_avar; break;
-      case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_avar; break;
-      case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_avar; break;
-      case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_avar; break;
-      case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_avar; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LDV_i8_v2_avar;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LDV_i16_v2_avar;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LDV_i32_v2_avar;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LDV_i64_v2_avar;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LDV_f32_v2_avar;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LDV_f64_v2_avar;
+        break;
       }
       break;
     case NVPTXISD::LoadV4:
       switch (EltVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_avar; break;
-      case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_avar; break;
-      case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_avar; break;
-      case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_avar; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LDV_i8_v4_avar;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LDV_i16_v4_avar;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LDV_i32_v4_avar;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LDV_f32_v4_avar;
+        break;
       }
       break;
     }
 
-    SDValue Ops[] = { getI32Imm(IsVolatile),
-                      getI32Imm(CodeAddrSpace),
-                      getI32Imm(VecType),
-                      getI32Imm(FromType),
-                      getI32Imm(FromTypeWidth),
-                      Addr, Chain };
+    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+                      getI32Imm(VecType), getI32Imm(FromType),
+                      getI32Imm(FromTypeWidth), Addr, Chain };
     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
-  } else if (Subtarget.is64Bit()?
-             SelectADDRsi64(Op1.getNode(), Op1, Base, Offset):
-             SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
+                 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
     switch (N->getOpcode()) {
-    default: return NULL;
+    default:
+      return NULL;
     case NVPTXISD::LoadV2:
       switch (EltVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_asi; break;
-      case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_asi; break;
-      case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_asi; break;
-      case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_asi; break;
-      case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_asi; break;
-      case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_asi; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LDV_i8_v2_asi;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LDV_i16_v2_asi;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LDV_i32_v2_asi;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::LDV_i64_v2_asi;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LDV_f32_v2_asi;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::LDV_f64_v2_asi;
+        break;
       }
       break;
     case NVPTXISD::LoadV4:
       switch (EltVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_asi; break;
-      case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_asi; break;
-      case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_asi; break;
-      case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_asi; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::LDV_i8_v4_asi;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::LDV_i16_v4_asi;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::LDV_i32_v4_asi;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::LDV_f32_v4_asi;
+        break;
       }
       break;
     }
 
-    SDValue Ops[] = { getI32Imm(IsVolatile),
-                      getI32Imm(CodeAddrSpace),
-                      getI32Imm(VecType),
-                      getI32Imm(FromType),
-                      getI32Imm(FromTypeWidth),
-                      Base, Offset, Chain };
+    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+                      getI32Imm(VecType), getI32Imm(FromType),
+                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
-  } else if (Subtarget.is64Bit()?
-             SelectADDRri64(Op1.getNode(), Op1, Base, Offset):
-             SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
+                 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
     if (Subtarget.is64Bit()) {
       switch (N->getOpcode()) {
-      default: return NULL;
+      default:
+        return NULL;
       case NVPTXISD::LoadV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_ari_64; break;
-        case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_ari_64; break;
-        case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_ari_64; break;
-        case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_ari_64; break;
-        case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_ari_64; break;
-        case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_ari_64; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v2_ari_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v2_ari_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v2_ari_64;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::LDV_i64_v2_ari_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v2_ari_64;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::LDV_f64_v2_ari_64;
+          break;
         }
         break;
       case NVPTXISD::LoadV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_ari_64; break;
-        case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_ari_64; break;
-        case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_ari_64; break;
-        case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_ari_64; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v4_ari_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v4_ari_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v4_ari_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v4_ari_64;
+          break;
         }
         break;
       }
     } else {
       switch (N->getOpcode()) {
-      default: return NULL;
+      default:
+        return NULL;
       case NVPTXISD::LoadV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_ari; break;
-        case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_ari; break;
-        case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_ari; break;
-        case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_ari; break;
-        case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_ari; break;
-        case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_ari; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v2_ari;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v2_ari;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v2_ari;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::LDV_i64_v2_ari;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v2_ari;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::LDV_f64_v2_ari;
+          break;
         }
         break;
       case NVPTXISD::LoadV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_ari; break;
-        case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_ari; break;
-        case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_ari; break;
-        case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_ari; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v4_ari;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v4_ari;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v4_ari;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v4_ari;
+          break;
         }
         break;
       }
     }
 
-    SDValue Ops[] = { getI32Imm(IsVolatile),
-                      getI32Imm(CodeAddrSpace),
-                      getI32Imm(VecType),
-                      getI32Imm(FromType),
-                      getI32Imm(FromTypeWidth),
-                      Base, Offset, Chain };
+    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+                      getI32Imm(VecType), getI32Imm(FromType),
+                      getI32Imm(FromTypeWidth), Base, Offset, Chain };
 
     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 8);
   } else {
     if (Subtarget.is64Bit()) {
       switch (N->getOpcode()) {
-      default: return NULL;
+      default:
+        return NULL;
       case NVPTXISD::LoadV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_areg_64; break;
-        case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_areg_64; break;
-        case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_areg_64; break;
-        case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_areg_64; break;
-        case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_areg_64; break;
-        case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_areg_64; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v2_areg_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v2_areg_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v2_areg_64;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::LDV_i64_v2_areg_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v2_areg_64;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::LDV_f64_v2_areg_64;
+          break;
         }
         break;
       case NVPTXISD::LoadV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_areg_64; break;
-        case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_areg_64; break;
-        case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_areg_64; break;
-        case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_areg_64; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v4_areg_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v4_areg_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v4_areg_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v4_areg_64;
+          break;
         }
         break;
       }
     } else {
       switch (N->getOpcode()) {
-      default: return NULL;
+      default:
+        return NULL;
       case NVPTXISD::LoadV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::LDV_i8_v2_areg; break;
-        case MVT::i16:  Opcode = NVPTX::LDV_i16_v2_areg; break;
-        case MVT::i32:  Opcode = NVPTX::LDV_i32_v2_areg; break;
-        case MVT::i64:  Opcode = NVPTX::LDV_i64_v2_areg; break;
-        case MVT::f32:  Opcode = NVPTX::LDV_f32_v2_areg; break;
-        case MVT::f64:  Opcode = NVPTX::LDV_f64_v2_areg; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v2_areg;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v2_areg;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v2_areg;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::LDV_i64_v2_areg;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v2_areg;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::LDV_f64_v2_areg;
+          break;
         }
         break;
       case NVPTXISD::LoadV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::LDV_i8_v4_areg; break;
-        case MVT::i16:  Opcode = NVPTX::LDV_i16_v4_areg; break;
-        case MVT::i32:  Opcode = NVPTX::LDV_i32_v4_areg; break;
-        case MVT::f32:  Opcode = NVPTX::LDV_f32_v4_areg; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::LDV_i8_v4_areg;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::LDV_i16_v4_areg;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::LDV_i32_v4_areg;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::LDV_f32_v4_areg;
+          break;
         }
         break;
       }
     }
 
-    SDValue Ops[] = { getI32Imm(IsVolatile),
-                      getI32Imm(CodeAddrSpace),
-                      getI32Imm(VecType),
-                      getI32Imm(FromType),
-                      getI32Imm(FromTypeWidth),
-                      Op1, Chain };
+    SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
+                      getI32Imm(VecType), getI32Imm(FromType),
+                      getI32Imm(FromTypeWidth), Op1, Chain };
     LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops, 7);
   }
 
@@ -598,89 +783,179 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
   // Select opcode
   if (Subtarget.is64Bit()) {
     switch (N->getOpcode()) {
-    default: return NULL;
+    default:
+      return NULL;
     case NVPTXISD::LDGV2:
       switch (RetVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64; break;
-      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64; break;
-      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64; break;
-      case MVT::i64:  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64; break;
-      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64; break;
-      case MVT::f64:  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_64;
+        break;
       }
       break;
     case NVPTXISD::LDGV4:
       switch (RetVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64; break;
-      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64; break;
-      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64; break;
-      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_64;
+        break;
       }
       break;
     case NVPTXISD::LDUV2:
       switch (RetVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64; break;
-      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64; break;
-      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64; break;
-      case MVT::i64:  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64; break;
-      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64; break;
-      case MVT::f64:  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_64;
+        break;
       }
       break;
     case NVPTXISD::LDUV4:
       switch (RetVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64; break;
-      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64; break;
-      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64; break;
-      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_64;
+        break;
       }
       break;
     }
   } else {
     switch (N->getOpcode()) {
-    default: return NULL;
+    default:
+      return NULL;
     case NVPTXISD::LDGV2:
       switch (RetVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32; break;
-      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32; break;
-      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32; break;
-      case MVT::i64:  Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32; break;
-      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32; break;
-      case MVT::f64:  Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_32;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_32;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_32;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_32;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_32;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_32;
+        break;
       }
       break;
     case NVPTXISD::LDGV4:
       switch (RetVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32; break;
-      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32; break;
-      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32; break;
-      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_32;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_32;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_32;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_32;
+        break;
       }
       break;
     case NVPTXISD::LDUV2:
       switch (RetVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32; break;
-      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32; break;
-      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32; break;
-      case MVT::i64:  Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32; break;
-      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32; break;
-      case MVT::f64:  Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_32;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_32;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_32;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_32;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_32;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_32;
+        break;
       }
       break;
     case NVPTXISD::LDUV4:
       switch (RetVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32; break;
-      case MVT::i16:  Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32; break;
-      case MVT::i32:  Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32; break;
-      case MVT::f32:  Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_32;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_32;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_32;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_32;
+        break;
       }
       break;
     }
@@ -696,8 +971,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
   return LD;
 }
 
-
-SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
+SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   StoreSDNode *ST = cast<StoreSDNode>(N);
   EVT StoreVT = ST->getMemoryVT();
@@ -738,7 +1012,7 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
   // - for integer type, always use 'u'
   //
   MVT ScalarVT = SimpleVT.getScalarType();
-  unsigned toTypeWidth =  ScalarVT.getSizeInBits();
+  unsigned toTypeWidth = ScalarVT.getSizeInBits();
   unsigned int toType;
   if (ScalarVT.isFloatingPoint())
     toType = NVPTX::PTXLdStInstCode::Float;
@@ -757,108 +1031,166 @@ SDNode* NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
 
   if (SelectDirectAddr(N2, Addr)) {
     switch (SourceVT) {
-    case MVT::i8:    Opcode = NVPTX::ST_i8_avar; break;
-    case MVT::i16:   Opcode = NVPTX::ST_i16_avar; break;
-    case MVT::i32:   Opcode = NVPTX::ST_i32_avar; break;
-    case MVT::i64:   Opcode = NVPTX::ST_i64_avar; break;
-    case MVT::f32:   Opcode = NVPTX::ST_f32_avar; break;
-    case MVT::f64:   Opcode = NVPTX::ST_f64_avar; break;
-    default: return NULL;
+    case MVT::i8:
+      Opcode = NVPTX::ST_i8_avar;
+      break;
+    case MVT::i16:
+      Opcode = NVPTX::ST_i16_avar;
+      break;
+    case MVT::i32:
+      Opcode = NVPTX::ST_i32_avar;
+      break;
+    case MVT::i64:
+      Opcode = NVPTX::ST_i64_avar;
+      break;
+    case MVT::f32:
+      Opcode = NVPTX::ST_f32_avar;
+      break;
+    case MVT::f64:
+      Opcode = NVPTX::ST_f64_avar;
+      break;
+    default:
+      return NULL;
     }
-    SDValue Ops[] = { N1,
-                      getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(toType),
-                      getI32Imm(toTypeWidth),
-                      Addr, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
-                                     MVT::Other, Ops, 8);
-  } else if (Subtarget.is64Bit()?
-      SelectADDRsi64(N2.getNode(), N2, Base, Offset):
-      SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(toType),
+                      getI32Imm(toTypeWidth), Addr, Chain };
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
     switch (SourceVT) {
-    case MVT::i8:    Opcode = NVPTX::ST_i8_asi; break;
-    case MVT::i16:   Opcode = NVPTX::ST_i16_asi; break;
-    case MVT::i32:   Opcode = NVPTX::ST_i32_asi; break;
-    case MVT::i64:   Opcode = NVPTX::ST_i64_asi; break;
-    case MVT::f32:   Opcode = NVPTX::ST_f32_asi; break;
-    case MVT::f64:   Opcode = NVPTX::ST_f64_asi; break;
-    default: return NULL;
+    case MVT::i8:
+      Opcode = NVPTX::ST_i8_asi;
+      break;
+    case MVT::i16:
+      Opcode = NVPTX::ST_i16_asi;
+      break;
+    case MVT::i32:
+      Opcode = NVPTX::ST_i32_asi;
+      break;
+    case MVT::i64:
+      Opcode = NVPTX::ST_i64_asi;
+      break;
+    case MVT::f32:
+      Opcode = NVPTX::ST_f32_asi;
+      break;
+    case MVT::f64:
+      Opcode = NVPTX::ST_f64_asi;
+      break;
+    default:
+      return NULL;
     }
-    SDValue Ops[] = { N1,
-                      getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(toType),
-                      getI32Imm(toTypeWidth),
-                      Base, Offset, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
-                                     MVT::Other, Ops, 9);
-  } else if (Subtarget.is64Bit()?
-      SelectADDRri64(N2.getNode(), N2, Base, Offset):
-      SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(toType),
+                      getI32Imm(toTypeWidth), Base, Offset, Chain };
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
     if (Subtarget.is64Bit()) {
       switch (SourceVT) {
-      case MVT::i8:    Opcode = NVPTX::ST_i8_ari_64; break;
-      case MVT::i16:   Opcode = NVPTX::ST_i16_ari_64; break;
-      case MVT::i32:   Opcode = NVPTX::ST_i32_ari_64; break;
-      case MVT::i64:   Opcode = NVPTX::ST_i64_ari_64; break;
-      case MVT::f32:   Opcode = NVPTX::ST_f32_ari_64; break;
-      case MVT::f64:   Opcode = NVPTX::ST_f64_ari_64; break;
-      default: return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::ST_i8_ari_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::ST_i16_ari_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::ST_i32_ari_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::ST_i64_ari_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::ST_f32_ari_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::ST_f64_ari_64;
+        break;
+      default:
+        return NULL;
       }
     } else {
       switch (SourceVT) {
-      case MVT::i8:    Opcode = NVPTX::ST_i8_ari; break;
-      case MVT::i16:   Opcode = NVPTX::ST_i16_ari; break;
-      case MVT::i32:   Opcode = NVPTX::ST_i32_ari; break;
-      case MVT::i64:   Opcode = NVPTX::ST_i64_ari; break;
-      case MVT::f32:   Opcode = NVPTX::ST_f32_ari; break;
-      case MVT::f64:   Opcode = NVPTX::ST_f64_ari; break;
-      default: return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::ST_i8_ari;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::ST_i16_ari;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::ST_i32_ari;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::ST_i64_ari;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::ST_f32_ari;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::ST_f64_ari;
+        break;
+      default:
+        return NULL;
       }
     }
-    SDValue Ops[] = { N1,
-                      getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(toType),
-                      getI32Imm(toTypeWidth),
-                      Base, Offset, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
-                                     MVT::Other, Ops, 9);
+    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(toType),
+                      getI32Imm(toTypeWidth), Base, Offset, Chain };
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 9);
   } else {
     if (Subtarget.is64Bit()) {
       switch (SourceVT) {
-      case MVT::i8:    Opcode = NVPTX::ST_i8_areg_64; break;
-      case MVT::i16:   Opcode = NVPTX::ST_i16_areg_64; break;
-      case MVT::i32:   Opcode = NVPTX::ST_i32_areg_64; break;
-      case MVT::i64:   Opcode = NVPTX::ST_i64_areg_64; break;
-      case MVT::f32:   Opcode = NVPTX::ST_f32_areg_64; break;
-      case MVT::f64:   Opcode = NVPTX::ST_f64_areg_64; break;
-      default: return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::ST_i8_areg_64;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::ST_i16_areg_64;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::ST_i32_areg_64;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::ST_i64_areg_64;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::ST_f32_areg_64;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::ST_f64_areg_64;
+        break;
+      default:
+        return NULL;
       }
     } else {
       switch (SourceVT) {
-      case MVT::i8:    Opcode = NVPTX::ST_i8_areg; break;
-      case MVT::i16:   Opcode = NVPTX::ST_i16_areg; break;
-      case MVT::i32:   Opcode = NVPTX::ST_i32_areg; break;
-      case MVT::i64:   Opcode = NVPTX::ST_i64_areg; break;
-      case MVT::f32:   Opcode = NVPTX::ST_f32_areg; break;
-      case MVT::f64:   Opcode = NVPTX::ST_f64_areg; break;
-      default: return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::ST_i8_areg;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::ST_i16_areg;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::ST_i32_areg;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::ST_i64_areg;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::ST_f32_areg;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::ST_f64_areg;
+        break;
+      default:
+        return NULL;
       }
     }
-    SDValue Ops[] = { N1,
-                      getI32Imm(isVolatile),
-                      getI32Imm(codeAddrSpace),
-                      getI32Imm(vecType),
-                      getI32Imm(toType),
-                      getI32Imm(toTypeWidth),
-                      N2, Chain };
-    NVPTXST = CurDAG->getMachineNode(Opcode, dl,
-                                     MVT::Other, Ops, 8);
+    SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
+                      getI32Imm(vecType), getI32Imm(toType),
+                      getI32Imm(toTypeWidth), N2, Chain };
+    NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops, 8);
   }
 
   if (NVPTXST != NULL) {
@@ -901,14 +1233,13 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
   // - for integer type, always use 'u'
   assert(StoreVT.isSimple() && "Store value is not simple");
   MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
-  unsigned ToTypeWidth =  ScalarVT.getSizeInBits();
+  unsigned ToTypeWidth = ScalarVT.getSizeInBits();
   unsigned ToType;
   if (ScalarVT.isFloatingPoint())
     ToType = NVPTX::PTXLdStInstCode::Float;
   else
     ToType = NVPTX::PTXLdStInstCode::Unsigned;
 
-
   SmallVector<SDValue, 12> StOps;
   SDValue N2;
   unsigned VecType;
@@ -928,7 +1259,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
     StOps.push_back(N->getOperand(4));
     N2 = N->getOperand(5);
     break;
-  default: return NULL;
+  default:
+    return NULL;
   }
 
   StOps.push_back(getI32Imm(IsVolatile));
@@ -939,105 +1271,197 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
 
   if (SelectDirectAddr(N2, Addr)) {
     switch (N->getOpcode()) {
-    default: return NULL;
+    default:
+      return NULL;
     case NVPTXISD::StoreV2:
       switch (EltVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::STV_i8_v2_avar; break;
-      case MVT::i16:  Opcode = NVPTX::STV_i16_v2_avar; break;
-      case MVT::i32:  Opcode = NVPTX::STV_i32_v2_avar; break;
-      case MVT::i64:  Opcode = NVPTX::STV_i64_v2_avar; break;
-      case MVT::f32:  Opcode = NVPTX::STV_f32_v2_avar; break;
-      case MVT::f64:  Opcode = NVPTX::STV_f64_v2_avar; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::STV_i8_v2_avar;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::STV_i16_v2_avar;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::STV_i32_v2_avar;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::STV_i64_v2_avar;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::STV_f32_v2_avar;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::STV_f64_v2_avar;
+        break;
       }
       break;
     case NVPTXISD::StoreV4:
       switch (EltVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::STV_i8_v4_avar; break;
-      case MVT::i16:  Opcode = NVPTX::STV_i16_v4_avar; break;
-      case MVT::i32:  Opcode = NVPTX::STV_i32_v4_avar; break;
-      case MVT::f32:  Opcode = NVPTX::STV_f32_v4_avar; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::STV_i8_v4_avar;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::STV_i16_v4_avar;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::STV_i32_v4_avar;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::STV_f32_v4_avar;
+        break;
       }
       break;
     }
     StOps.push_back(Addr);
-  } else if (Subtarget.is64Bit()?
-             SelectADDRsi64(N2.getNode(), N2, Base, Offset):
-             SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
+                 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
     switch (N->getOpcode()) {
-    default: return NULL;
+    default:
+      return NULL;
     case NVPTXISD::StoreV2:
       switch (EltVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::STV_i8_v2_asi; break;
-      case MVT::i16:  Opcode = NVPTX::STV_i16_v2_asi; break;
-      case MVT::i32:  Opcode = NVPTX::STV_i32_v2_asi; break;
-      case MVT::i64:  Opcode = NVPTX::STV_i64_v2_asi; break;
-      case MVT::f32:  Opcode = NVPTX::STV_f32_v2_asi; break;
-      case MVT::f64:  Opcode = NVPTX::STV_f64_v2_asi; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::STV_i8_v2_asi;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::STV_i16_v2_asi;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::STV_i32_v2_asi;
+        break;
+      case MVT::i64:
+        Opcode = NVPTX::STV_i64_v2_asi;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::STV_f32_v2_asi;
+        break;
+      case MVT::f64:
+        Opcode = NVPTX::STV_f64_v2_asi;
+        break;
       }
       break;
     case NVPTXISD::StoreV4:
       switch (EltVT.getSimpleVT().SimpleTy) {
-      default: return NULL;
-      case MVT::i8:   Opcode = NVPTX::STV_i8_v4_asi; break;
-      case MVT::i16:  Opcode = NVPTX::STV_i16_v4_asi; break;
-      case MVT::i32:  Opcode = NVPTX::STV_i32_v4_asi; break;
-      case MVT::f32:  Opcode = NVPTX::STV_f32_v4_asi; break;
+      default:
+        return NULL;
+      case MVT::i8:
+        Opcode = NVPTX::STV_i8_v4_asi;
+        break;
+      case MVT::i16:
+        Opcode = NVPTX::STV_i16_v4_asi;
+        break;
+      case MVT::i32:
+        Opcode = NVPTX::STV_i32_v4_asi;
+        break;
+      case MVT::f32:
+        Opcode = NVPTX::STV_f32_v4_asi;
+        break;
       }
       break;
     }
     StOps.push_back(Base);
     StOps.push_back(Offset);
-  } else if (Subtarget.is64Bit()?
-             SelectADDRri64(N2.getNode(), N2, Base, Offset):
-             SelectADDRri(N2.getNode(), N2, Base, Offset)) {
+  } else if (Subtarget.is64Bit()
+                 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
+                 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
     if (Subtarget.is64Bit()) {
       switch (N->getOpcode()) {
-      default: return NULL;
+      default:
+        return NULL;
       case NVPTXISD::StoreV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::STV_i8_v2_ari_64; break;
-        case MVT::i16:  Opcode = NVPTX::STV_i16_v2_ari_64; break;
-        case MVT::i32:  Opcode = NVPTX::STV_i32_v2_ari_64; break;
-        case MVT::i64:  Opcode = NVPTX::STV_i64_v2_ari_64; break;
-        case MVT::f32:  Opcode = NVPTX::STV_f32_v2_ari_64; break;
-        case MVT::f64:  Opcode = NVPTX::STV_f64_v2_ari_64; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v2_ari_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v2_ari_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v2_ari_64;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::STV_i64_v2_ari_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v2_ari_64;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::STV_f64_v2_ari_64;
+          break;
         }
         break;
       case NVPTXISD::StoreV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::STV_i8_v4_ari_64; break;
-        case MVT::i16:  Opcode = NVPTX::STV_i16_v4_ari_64; break;
-        case MVT::i32:  Opcode = NVPTX::STV_i32_v4_ari_64; break;
-        case MVT::f32:  Opcode = NVPTX::STV_f32_v4_ari_64; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v4_ari_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v4_ari_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v4_ari_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v4_ari_64;
+          break;
         }
         break;
       }
     } else {
       switch (N->getOpcode()) {
-      default: return NULL;
+      default:
+        return NULL;
       case NVPTXISD::StoreV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::STV_i8_v2_ari; break;
-        case MVT::i16:  Opcode = NVPTX::STV_i16_v2_ari; break;
-        case MVT::i32:  Opcode = NVPTX::STV_i32_v2_ari; break;
-        case MVT::i64:  Opcode = NVPTX::STV_i64_v2_ari; break;
-        case MVT::f32:  Opcode = NVPTX::STV_f32_v2_ari; break;
-        case MVT::f64:  Opcode = NVPTX::STV_f64_v2_ari; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v2_ari;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v2_ari;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v2_ari;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::STV_i64_v2_ari;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v2_ari;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::STV_f64_v2_ari;
+          break;
         }
         break;
       case NVPTXISD::StoreV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::STV_i8_v4_ari; break;
-        case MVT::i16:  Opcode = NVPTX::STV_i16_v4_ari; break;
-        case MVT::i32:  Opcode = NVPTX::STV_i32_v4_ari; break;
-        case MVT::f32:  Opcode = NVPTX::STV_f32_v4_ari; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v4_ari;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v4_ari;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v4_ari;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v4_ari;
+          break;
         }
         break;
       }
@@ -1047,49 +1471,95 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
   } else {
     if (Subtarget.is64Bit()) {
       switch (N->getOpcode()) {
-      default: return NULL;
+      default:
+        return NULL;
       case NVPTXISD::StoreV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::STV_i8_v2_areg_64; break;
-        case MVT::i16:  Opcode = NVPTX::STV_i16_v2_areg_64; break;
-        case MVT::i32:  Opcode = NVPTX::STV_i32_v2_areg_64; break;
-        case MVT::i64:  Opcode = NVPTX::STV_i64_v2_areg_64; break;
-        case MVT::f32:  Opcode = NVPTX::STV_f32_v2_areg_64; break;
-        case MVT::f64:  Opcode = NVPTX::STV_f64_v2_areg_64; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v2_areg_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v2_areg_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v2_areg_64;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::STV_i64_v2_areg_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v2_areg_64;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::STV_f64_v2_areg_64;
+          break;
         }
         break;
       case NVPTXISD::StoreV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::STV_i8_v4_areg_64; break;
-        case MVT::i16:  Opcode = NVPTX::STV_i16_v4_areg_64; break;
-        case MVT::i32:  Opcode = NVPTX::STV_i32_v4_areg_64; break;
-        case MVT::f32:  Opcode = NVPTX::STV_f32_v4_areg_64; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v4_areg_64;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v4_areg_64;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v4_areg_64;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v4_areg_64;
+          break;
         }
         break;
       }
     } else {
       switch (N->getOpcode()) {
-      default: return NULL;
+      default:
+        return NULL;
       case NVPTXISD::StoreV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::STV_i8_v2_areg; break;
-        case MVT::i16:  Opcode = NVPTX::STV_i16_v2_areg; break;
-        case MVT::i32:  Opcode = NVPTX::STV_i32_v2_areg; break;
-        case MVT::i64:  Opcode = NVPTX::STV_i64_v2_areg; break;
-        case MVT::f32:  Opcode = NVPTX::STV_f32_v2_areg; break;
-        case MVT::f64:  Opcode = NVPTX::STV_f64_v2_areg; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v2_areg;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v2_areg;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v2_areg;
+          break;
+        case MVT::i64:
+          Opcode = NVPTX::STV_i64_v2_areg;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v2_areg;
+          break;
+        case MVT::f64:
+          Opcode = NVPTX::STV_f64_v2_areg;
+          break;
         }
         break;
       case NVPTXISD::StoreV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
-        default: return NULL;
-        case MVT::i8:   Opcode = NVPTX::STV_i8_v4_areg; break;
-        case MVT::i16:  Opcode = NVPTX::STV_i16_v4_areg; break;
-        case MVT::i32:  Opcode = NVPTX::STV_i32_v4_areg; break;
-        case MVT::f32:  Opcode = NVPTX::STV_f32_v4_areg; break;
+        default:
+          return NULL;
+        case MVT::i8:
+          Opcode = NVPTX::STV_i8_v4_areg;
+          break;
+        case MVT::i16:
+          Opcode = NVPTX::STV_i16_v4_areg;
+          break;
+        case MVT::i32:
+          Opcode = NVPTX::STV_i32_v4_areg;
+          break;
+        case MVT::f32:
+          Opcode = NVPTX::STV_f32_v4_areg;
+          break;
         }
         break;
       }
@@ -1112,8 +1582,8 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
 // A direct address could be a globaladdress or externalsymbol.
 bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
   // Return true if TGA or ES.
-  if (N.getOpcode() == ISD::TargetGlobalAddress
-      || N.getOpcode() == ISD::TargetExternalSymbol) {
+  if (N.getOpcode() == ISD::TargetGlobalAddress ||
+      N.getOpcode() == ISD::TargetExternalSymbol) {
     Address = N;
     return true;
   }
@@ -1131,12 +1601,11 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
 }
 
 // symbol+offset
-bool NVPTXDAGToDAGISel::SelectADDRsi_imp(SDNode *OpNode, SDValue Addr,
-                                         SDValue &Base, SDValue &Offset,
-                                         MVT mvt) {
+bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
+    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
   if (Addr.getOpcode() == ISD::ADD) {
     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
-      SDValue base=Addr.getOperand(0);
+      SDValue base = Addr.getOperand(0);
       if (SelectDirectAddr(base, Base)) {
         Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
         return true;
@@ -1159,9 +1628,8 @@ bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
 }
 
 // register+offset
-bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
-                                         SDValue &Base, SDValue &Offset,
-                                         MVT mvt) {
+bool NVPTXDAGToDAGISel::SelectADDRri_imp(
+    SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
     Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
     Offset = CurDAG->getTargetConstant(0, mvt);
@@ -1169,7 +1637,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
   }
   if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
       Addr.getOpcode() == ISD::TargetGlobalAddress)
-    return false;  // direct calls.
+    return false; // direct calls.
 
   if (Addr.getOpcode() == ISD::ADD) {
     if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
@@ -1177,7 +1645,7 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(SDNode *OpNode, SDValue Addr,
     }
     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
       if (FrameIndexSDNode *FIN =
-          dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
         // Constant offset from frame ref.
         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
       else
@@ -1209,8 +1677,7 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
   // (See SelectionDAGNodes.h). So we need to check for both.
   if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
     Src = mN->getSrcValue();
-  }
-  else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
+  } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
     Src = mN->getSrcValue();
   }
   if (!Src)
@@ -1222,13 +1689,13 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
 
 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
 /// inline asm expressions.
-bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                                     char ConstraintCode,
-                                                 std::vector<SDValue> &OutOps) {
+bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
+    const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
   SDValue Op0, Op1;
   switch (ConstraintCode) {
-  default: return true;
-  case 'm':   // memory
+  default:
+    return true;
+  case 'm': // memory
     if (SelectDirectAddr(Op, Op0)) {
       OutOps.push_back(Op0);
       OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
@@ -1251,10 +1718,8 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
 // pattern matcher inserts a bunch of IMOVi8rr to convert
 // the imm to i8imm, and this causes instruction selection
 // to fail.
-bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N,
-                                   SDValue &Retval) {
-  if (!(N.getOpcode() == ISD::UNDEF) &&
-      !(N.getOpcode() == ISD::Constant))
+bool NVPTXDAGToDAGISel::UndefOrImm(SDValue Op, SDValue N, SDValue &Retval) {
+  if (!(N.getOpcode() == ISD::UNDEF) && !(N.getOpcode() == ISD::Constant))
     return false;
 
   if (N.getOpcode() == ISD::UNDEF)
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 4ec924117a..70e8e46429 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -64,11 +64,10 @@ public:
 
   const NVPTXSubtarget &Subtarget;
 
-  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                            char ConstraintCode,
-                                            std::vector<SDValue> &OutOps);
+  virtual bool SelectInlineAsmMemoryOperand(
+      const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps);
 private:
-  // Include the pieces autogenerated from the target description.
+// Include the pieces autogenerated from the target description.
 #include "NVPTXGenDAGISel.inc"
 
   SDNode *Select(SDNode *N);
@@ -99,7 +98,6 @@ private:
   bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base,
                       SDValue &Offset);
 
-
   bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
 
   bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index e9a9fbfd04..6e01a5a820 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #include "NVPTXISelLowering.h"
 #include "NVPTX.h"
 #include "NVPTXTargetMachine.h"
@@ -44,14 +43,14 @@ using namespace llvm;
 
 static unsigned int uniqueCallSite = 0;
 
-static cl::opt<bool>
-sched4reg("nvptx-sched4reg",
-          cl::desc("NVPTX Specific: schedule for register pressue"),
-          cl::init(false));
+static cl::opt<bool> sched4reg(
+    "nvptx-sched4reg",
+    cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
 
 static bool IsPTXVectorType(MVT VT) {
   switch (VT.SimpleTy) {
-  default: return false;
+  default:
+    return false;
   case MVT::v2i8:
   case MVT::v4i8:
   case MVT::v2i16:
@@ -62,22 +61,21 @@ static bool IsPTXVectorType(MVT VT) {
   case MVT::v2f32:
   case MVT::v4f32:
   case MVT::v2f64:
-  return true;
+    return true;
   }
 }
 
 // NVPTXTargetLowering Constructor.
 NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
-: TargetLowering(TM, new NVPTXTargetObjectFile()),
-  nvTM(&TM),
-  nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
+    : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM),
+      nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
 
   // always lower memset, memcpy, and memmove intrinsics to load/store
   // instructions, rather
   // then generating calls to memset, mempcy or memmove.
-  MaxStoresPerMemset = (unsigned)0xFFFFFFFF;
-  MaxStoresPerMemcpy = (unsigned)0xFFFFFFFF;
-  MaxStoresPerMemmove = (unsigned)0xFFFFFFFF;
+  MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
+  MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
+  MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
 
   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 
@@ -100,52 +98,50 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
   addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
 
   // Operations not directly supported by NVPTX.
-  setOperationAction(ISD::SELECT_CC,         MVT::Other, Expand);
-  setOperationAction(ISD::BR_CC,             MVT::f32, Expand);
-  setOperationAction(ISD::BR_CC,             MVT::f64, Expand);
-  setOperationAction(ISD::BR_CC,             MVT::i1,  Expand);
-  setOperationAction(ISD::BR_CC,             MVT::i8,  Expand);
-  setOperationAction(ISD::BR_CC,             MVT::i16, Expand);
-  setOperationAction(ISD::BR_CC,             MVT::i32, Expand);
-  setOperationAction(ISD::BR_CC,             MVT::i64, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC, MVT::f32, Expand);
+  setOperationAction(ISD::BR_CC, MVT::f64, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i1, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i8, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i16, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i64, Expand);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 
   if (nvptxSubtarget.hasROT64()) {
-    setOperationAction(ISD::ROTL , MVT::i64, Legal);
-    setOperationAction(ISD::ROTR , MVT::i64, Legal);
-  }
-  else {
-    setOperationAction(ISD::ROTL , MVT::i64, Expand);
-    setOperationAction(ISD::ROTR , MVT::i64, Expand);
+    setOperationAction(ISD::ROTL, MVT::i64, Legal);
+    setOperationAction(ISD::ROTR, MVT::i64, Legal);
+  } else {
+    setOperationAction(ISD::ROTL, MVT::i64, Expand);
+    setOperationAction(ISD::ROTR, MVT::i64, Expand);
   }
   if (nvptxSubtarget.hasROT32()) {
-    setOperationAction(ISD::ROTL , MVT::i32, Legal);
-    setOperationAction(ISD::ROTR , MVT::i32, Legal);
-  }
-  else {
-    setOperationAction(ISD::ROTL , MVT::i32, Expand);
-    setOperationAction(ISD::ROTR , MVT::i32, Expand);
+    setOperationAction(ISD::ROTL, MVT::i32, Legal);
+    setOperationAction(ISD::ROTR, MVT::i32, Legal);
+  } else {
+    setOperationAction(ISD::ROTL, MVT::i32, Expand);
+    setOperationAction(ISD::ROTR, MVT::i32, Expand);
   }
 
-  setOperationAction(ISD::ROTL , MVT::i16, Expand);
-  setOperationAction(ISD::ROTR , MVT::i16, Expand);
-  setOperationAction(ISD::ROTL , MVT::i8, Expand);
-  setOperationAction(ISD::ROTR , MVT::i8, Expand);
-  setOperationAction(ISD::BSWAP , MVT::i16, Expand);
-  setOperationAction(ISD::BSWAP , MVT::i32, Expand);
-  setOperationAction(ISD::BSWAP , MVT::i64, Expand);
+  setOperationAction(ISD::ROTL, MVT::i16, Expand);
+  setOperationAction(ISD::ROTR, MVT::i16, Expand);
+  setOperationAction(ISD::ROTL, MVT::i8, Expand);
+  setOperationAction(ISD::ROTR, MVT::i8, Expand);
+  setOperationAction(ISD::BSWAP, MVT::i16, Expand);
+  setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+  setOperationAction(ISD::BSWAP, MVT::i64, Expand);
 
   // Indirect branch is not supported.
   // This also disables Jump Table creation.
-  setOperationAction(ISD::BR_JT,             MVT::Other, Expand);
-  setOperationAction(ISD::BRIND,             MVT::Other, Expand);
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  setOperationAction(ISD::BRIND, MVT::Other, Expand);
 
-  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
-  setOperationAction(ISD::GlobalAddress   , MVT::i64  , Custom);
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
 
   // We want to legalize constant related memmove and memcopy
   // intrinsics.
@@ -168,16 +164,16 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
   setTruncStoreAction(MVT::i8, MVT::i1, Expand);
 
   // This is legal in NVPTX
-  setOperationAction(ISD::ConstantFP,         MVT::f64, Legal);
-  setOperationAction(ISD::ConstantFP,         MVT::f32, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 
   // TRAP can be lowered to PTX trap
-  setOperationAction(ISD::TRAP,               MVT::Other, Legal);
+  setOperationAction(ISD::TRAP, MVT::Other, Legal);
 
   // Register custom handling for vector loads/stores
-  for (int i = MVT::FIRST_VECTOR_VALUETYPE;
-       i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
-    MVT VT = (MVT::SimpleValueType)i;
+  for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE;
+       ++i) {
+    MVT VT = (MVT::SimpleValueType) i;
     if (IsPTXVectorType(VT)) {
       setOperationAction(ISD::LOAD, VT, Custom);
       setOperationAction(ISD::STORE, VT, Custom);
@@ -190,49 +186,86 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
   computeRegisterProperties();
 }
 
-
 const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch (Opcode) {
-  default: return 0;
-  case NVPTXISD::CALL:            return "NVPTXISD::CALL";
-  case NVPTXISD::RET_FLAG:        return "NVPTXISD::RET_FLAG";
-  case NVPTXISD::Wrapper:         return "NVPTXISD::Wrapper";
-  case NVPTXISD::NVBuiltin:       return "NVPTXISD::NVBuiltin";
-  case NVPTXISD::DeclareParam:    return "NVPTXISD::DeclareParam";
+  default:
+    return 0;
+  case NVPTXISD::CALL:
+    return "NVPTXISD::CALL";
+  case NVPTXISD::RET_FLAG:
+    return "NVPTXISD::RET_FLAG";
+  case NVPTXISD::Wrapper:
+    return "NVPTXISD::Wrapper";
+  case NVPTXISD::NVBuiltin:
+    return "NVPTXISD::NVBuiltin";
+  case NVPTXISD::DeclareParam:
+    return "NVPTXISD::DeclareParam";
   case NVPTXISD::DeclareScalarParam:
     return "NVPTXISD::DeclareScalarParam";
-  case NVPTXISD::DeclareRet:      return "NVPTXISD::DeclareRet";
-  case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam";
-  case NVPTXISD::PrintCall:       return "NVPTXISD::PrintCall";
-  case NVPTXISD::LoadParam:       return "NVPTXISD::LoadParam";
-  case NVPTXISD::StoreParam:      return "NVPTXISD::StoreParam";
-  case NVPTXISD::StoreParamS32:   return "NVPTXISD::StoreParamS32";
-  case NVPTXISD::StoreParamU32:   return "NVPTXISD::StoreParamU32";
-  case NVPTXISD::MoveToParam:     return "NVPTXISD::MoveToParam";
-  case NVPTXISD::CallArgBegin:    return "NVPTXISD::CallArgBegin";
-  case NVPTXISD::CallArg:         return "NVPTXISD::CallArg";
-  case NVPTXISD::LastCallArg:     return "NVPTXISD::LastCallArg";
-  case NVPTXISD::CallArgEnd:      return "NVPTXISD::CallArgEnd";
-  case NVPTXISD::CallVoid:        return "NVPTXISD::CallVoid";
-  case NVPTXISD::CallVal:         return "NVPTXISD::CallVal";
-  case NVPTXISD::CallSymbol:      return "NVPTXISD::CallSymbol";
-  case NVPTXISD::Prototype:       return "NVPTXISD::Prototype";
-  case NVPTXISD::MoveParam:       return "NVPTXISD::MoveParam";
-  case NVPTXISD::MoveRetval:      return "NVPTXISD::MoveRetval";
-  case NVPTXISD::MoveToRetval:    return "NVPTXISD::MoveToRetval";
-  case NVPTXISD::StoreRetval:     return "NVPTXISD::StoreRetval";
-  case NVPTXISD::PseudoUseParam:  return "NVPTXISD::PseudoUseParam";
-  case NVPTXISD::RETURN:          return "NVPTXISD::RETURN";
-  case NVPTXISD::CallSeqBegin:    return "NVPTXISD::CallSeqBegin";
-  case NVPTXISD::CallSeqEnd:      return "NVPTXISD::CallSeqEnd";
-  case NVPTXISD::LoadV2:          return "NVPTXISD::LoadV2";
-  case NVPTXISD::LoadV4:          return "NVPTXISD::LoadV4";
-  case NVPTXISD::LDGV2:           return "NVPTXISD::LDGV2";
-  case NVPTXISD::LDGV4:           return "NVPTXISD::LDGV4";
-  case NVPTXISD::LDUV2:           return "NVPTXISD::LDUV2";
-  case NVPTXISD::LDUV4:           return "NVPTXISD::LDUV4";
-  case NVPTXISD::StoreV2:         return "NVPTXISD::StoreV2";
-  case NVPTXISD::StoreV4:         return "NVPTXISD::StoreV4";
+  case NVPTXISD::DeclareRet:
+    return "NVPTXISD::DeclareRet";
+  case NVPTXISD::DeclareRetParam:
+    return "NVPTXISD::DeclareRetParam";
+  case NVPTXISD::PrintCall:
+    return "NVPTXISD::PrintCall";
+  case NVPTXISD::LoadParam:
+    return "NVPTXISD::LoadParam";
+  case NVPTXISD::StoreParam:
+    return "NVPTXISD::StoreParam";
+  case NVPTXISD::StoreParamS32:
+    return "NVPTXISD::StoreParamS32";
+  case NVPTXISD::StoreParamU32:
+    return "NVPTXISD::StoreParamU32";
+  case NVPTXISD::MoveToParam:
+    return "NVPTXISD::MoveToParam";
+  case NVPTXISD::CallArgBegin:
+    return "NVPTXISD::CallArgBegin";
+  case NVPTXISD::CallArg:
+    return "NVPTXISD::CallArg";
+  case NVPTXISD::LastCallArg:
+    return "NVPTXISD::LastCallArg";
+  case NVPTXISD::CallArgEnd:
+    return "NVPTXISD::CallArgEnd";
+  case NVPTXISD::CallVoid:
+    return "NVPTXISD::CallVoid";
+  case NVPTXISD::CallVal:
+    return "NVPTXISD::CallVal";
+  case NVPTXISD::CallSymbol:
+    return "NVPTXISD::CallSymbol";
+  case NVPTXISD::Prototype:
+    return "NVPTXISD::Prototype";
+  case NVPTXISD::MoveParam:
+    return "NVPTXISD::MoveParam";
+  case NVPTXISD::MoveRetval:
+    return "NVPTXISD::MoveRetval";
+  case NVPTXISD::MoveToRetval:
+    return "NVPTXISD::MoveToRetval";
+  case NVPTXISD::StoreRetval:
+    return "NVPTXISD::StoreRetval";
+  case NVPTXISD::PseudoUseParam:
+    return "NVPTXISD::PseudoUseParam";
+  case NVPTXISD::RETURN:
+    return "NVPTXISD::RETURN";
+  case NVPTXISD::CallSeqBegin:
+    return "NVPTXISD::CallSeqBegin";
+  case NVPTXISD::CallSeqEnd:
+    return "NVPTXISD::CallSeqEnd";
+  case NVPTXISD::LoadV2:
+    return "NVPTXISD::LoadV2";
+  case NVPTXISD::LoadV4:
+    return "NVPTXISD::LoadV4";
+  case NVPTXISD::LDGV2:
+    return "NVPTXISD::LDGV2";
+  case NVPTXISD::LDGV4:
+    return "NVPTXISD::LDGV4";
+  case NVPTXISD::LDUV2:
+    return "NVPTXISD::LDUV2";
+  case NVPTXISD::LDUV4:
+    return "NVPTXISD::LDUV4";
+  case NVPTXISD::StoreV2:
+    return "NVPTXISD::StoreV2";
+  case NVPTXISD::StoreV4:
+    return "NVPTXISD::StoreV4";
   }
 }
 
@@ -248,10 +281,9 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
 }
 
-std::string NVPTXTargetLowering::getPrototype(Type *retTy,
-                                              const ArgListTy &Args,
-                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                              unsigned retAlignment) const {
+std::string NVPTXTargetLowering::getPrototype(
+    Type *retTy, const ArgListTy &Args,
+    const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const {
 
   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
 
@@ -267,54 +299,47 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
         unsigned size = 0;
         if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
           size = ITy->getBitWidth();
-          if (size < 32) size = 32;
-        }
-        else {
+          if (size < 32)
+            size = 32;
+        } else {
           assert(retTy->isFloatingPointTy() &&
                  "Floating point type expected here");
           size = retTy->getPrimitiveSizeInBits();
         }
 
         O << ".param .b" << size << " _";
-      }
-      else if (isa<PointerType>(retTy))
-        O << ".param .b" << getPointerTy().getSizeInBits()
-        << " _";
+      } else if (isa<PointerType>(retTy))
+        O << ".param .b" << getPointerTy().getSizeInBits() << " _";
       else {
         if ((retTy->getTypeID() == Type::StructTyID) ||
             isa<VectorType>(retTy)) {
           SmallVector<EVT, 16> vtparts;
           ComputeValueVTs(*this, retTy, vtparts);
           unsigned totalsz = 0;
-          for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+          for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
             unsigned elems = 1;
             EVT elemtype = vtparts[i];
             if (vtparts[i].isVector()) {
               elems = vtparts[i].getVectorNumElements();
               elemtype = vtparts[i].getVectorElementType();
             }
-            for (unsigned j=0, je=elems; j!=je; ++j) {
+            for (unsigned j = 0, je = elems; j != je; ++j) {
               unsigned sz = elemtype.getSizeInBits();
-              if (elemtype.isInteger() && (sz < 8)) sz = 8;
-              totalsz += sz/8;
+              if (elemtype.isInteger() && (sz < 8))
+                sz = 8;
+              totalsz += sz / 8;
             }
           }
-          O << ".param .align "
-              << retAlignment
-              << " .b8 _["
-              << totalsz << "]";
-        }
-        else {
-          assert(false &&
-                 "Unknown return type");
+          O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]";
+        } else {
+          assert(false && "Unknown return type");
         }
       }
-    }
-    else {
+    } else {
       SmallVector<EVT, 16> vtparts;
       ComputeValueVTs(*this, retTy, vtparts);
       unsigned idx = 0;
-      for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+      for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
         unsigned elems = 1;
         EVT elemtype = vtparts[i];
         if (vtparts[i].isVector()) {
@@ -322,14 +347,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
           elemtype = vtparts[i].getVectorElementType();
         }
 
-        for (unsigned j=0, je=elems; j!=je; ++j) {
+        for (unsigned j = 0, je = elems; j != je; ++j) {
           unsigned sz = elemtype.getSizeInBits();
-          if (elemtype.isInteger() && (sz < 32)) sz = 32;
+          if (elemtype.isInteger() && (sz < 32))
+            sz = 32;
           O << ".reg .b" << sz << " _";
-          if (j<je-1) O << ", ";
+          if (j < je - 1)
+            O << ", ";
           ++idx;
         }
-        if (i < e-1)
+        if (i < e - 1)
           O << ", ";
       }
     }
@@ -340,7 +367,7 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
   bool first = true;
   MVT thePointerTy = getPointerTy();
 
-  for (unsigned i=0,e=Args.size(); i!=e; ++i) {
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
     const Type *Ty = Args[i].Ty;
     if (!first) {
       O << ", ";
@@ -351,9 +378,9 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
       unsigned sz = 0;
       if (isa<IntegerType>(Ty)) {
         sz = cast<IntegerType>(Ty)->getBitWidth();
-        if (sz < 32) sz = 32;
-      }
-      else if (isa<PointerType>(Ty))
+        if (sz < 32)
+          sz = 32;
+      } else if (isa<PointerType>(Ty))
         sz = thePointerTy.getSizeInBits();
       else
         sz = Ty->getPrimitiveSizeInBits();
@@ -365,23 +392,20 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
       continue;
     }
     const PointerType *PTy = dyn_cast<PointerType>(Ty);
-    assert(PTy &&
-           "Param with byval attribute should be a pointer type");
+    assert(PTy && "Param with byval attribute should be a pointer type");
     Type *ETy = PTy->getElementType();
 
     if (isABI) {
       unsigned align = Outs[i].Flags.getByValAlign();
       unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
-      O << ".param .align " << align
-          << " .b8 ";
+      O << ".param .align " << align << " .b8 ";
       O << "_";
       O << "[" << sz << "]";
       continue;
-    }
-    else {
+    } else {
       SmallVector<EVT, 16> vtparts;
       ComputeValueVTs(*this, ETy, vtparts);
-      for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
+      for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
         unsigned elems = 1;
         EVT elemtype = vtparts[i];
         if (vtparts[i].isVector()) {
@@ -389,14 +413,16 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
           elemtype = vtparts[i].getVectorElementType();
         }
 
-        for (unsigned j=0,je=elems; j!=je; ++j) {
+        for (unsigned j = 0, je = elems; j != je; ++j) {
           unsigned sz = elemtype.getSizeInBits();
-          if (elemtype.isInteger() && (sz < 32)) sz = 32;
+          if (elemtype.isInteger() && (sz < 32))
+            sz = 32;
           O << ".reg .b" << sz << " ";
           O << "_";
-          if (j<je-1) O << ", ";
+          if (j < je - 1)
+            O << ", ";
         }
-        if (i<e-1)
+        if (i < e - 1)
           O << ", ";
       }
       continue;
@@ -406,27 +432,25 @@ std::string NVPTXTargetLowering::getPrototype(Type *retTy,
   return O.str();
 }
 
-
-SDValue
-NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
-                               SmallVectorImpl<SDValue> &InVals) const {
-  SelectionDAG &DAG                     = CLI.DAG;
-  DebugLoc &dl                          = CLI.DL;
+SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+                                       SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG = CLI.DAG;
+  DebugLoc &dl = CLI.DL;
   SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
-  SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
-  SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
-  SDValue Chain                         = CLI.Chain;
-  SDValue Callee                        = CLI.Callee;
-  bool &isTailCall                      = CLI.IsTailCall;
-  ArgListTy &Args                       = CLI.Args;
-  Type *retTy                           = CLI.RetTy;
-  ImmutableCallSite *CS                 = CLI.CS;
+  SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+  SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+  SDValue Chain = CLI.Chain;
+  SDValue Callee = CLI.Callee;
+  bool &isTailCall = CLI.IsTailCall;
+  ArgListTy &Args = CLI.Args;
+  Type *retTy = CLI.RetTy;
+  ImmutableCallSite *CS = CLI.CS;
 
   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
 
   SDValue tempChain = Chain;
-  Chain = DAG.getCALLSEQ_START(Chain,
-                               DAG.getIntPtrConstant(uniqueCallSite, true));
+  Chain =
+      DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(uniqueCallSite, true));
   SDValue InFlag = Chain.getValue(1);
 
   assert((Outs.size() == Args.size()) &&
@@ -434,7 +458,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   unsigned paramCount = 0;
   // Declare the .params or .reg need to pass values
   // to the function
-  for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
     EVT VT = Outs[i].VT;
 
     if (Outs[i].Flags.isByVal() == false) {
@@ -445,19 +469,20 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       if (isABI)
         isReg = 0;
       unsigned sz = VT.getSizeInBits();
-      if (VT.isInteger() && (sz < 32)) sz = 32;
+      if (VT.isInteger() && (sz < 32))
+        sz = 32;
       SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
       SDValue DeclareParamOps[] = { Chain,
                                     DAG.getConstant(paramCount, MVT::i32),
                                     DAG.getConstant(sz, MVT::i32),
-                                    DAG.getConstant(isReg, MVT::i32),
-                                    InFlag };
+                                    DAG.getConstant(isReg, MVT::i32), InFlag };
       Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
                           DeclareParamOps, 5);
       InFlag = Chain.getValue(1);
       SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
       SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
-                             DAG.getConstant(0, MVT::i32), OutVals[i], InFlag };
+                                 DAG.getConstant(0, MVT::i32), OutVals[i],
+                                 InFlag };
 
       unsigned opcode = NVPTXISD::StoreParam;
       if (isReg)
@@ -477,8 +502,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // struct or vector
     SmallVector<EVT, 16> vtparts;
     const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
-    assert(PTy &&
-           "Type of a byval parameter should be pointer");
+    assert(PTy && "Type of a byval parameter should be pointer");
     ComputeValueVTs(*this, PTy->getElementType(), vtparts);
 
     if (isABI) {
@@ -488,40 +512,41 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       // The ByValAlign in the Outs[i].Flags is alway set at this point, so we
       // don't need to
       // worry about natural alignment or not. See TargetLowering::LowerCallTo()
-      SDValue DeclareParamOps[] = { Chain,
-                       DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
-                                    DAG.getConstant(paramCount, MVT::i32),
-                                    DAG.getConstant(sz, MVT::i32),
-                                    InFlag };
+      SDValue DeclareParamOps[] = {
+        Chain, DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
+        DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32),
+        InFlag
+      };
       Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
                           DeclareParamOps, 5);
       InFlag = Chain.getValue(1);
       unsigned curOffset = 0;
-      for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
+      for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
         unsigned elems = 1;
         EVT elemtype = vtparts[j];
         if (vtparts[j].isVector()) {
           elems = vtparts[j].getVectorNumElements();
           elemtype = vtparts[j].getVectorElementType();
         }
-        for (unsigned k=0,ke=elems; k!=ke; ++k) {
+        for (unsigned k = 0, ke = elems; k != ke; ++k) {
           unsigned sz = elemtype.getSizeInBits();
-          if (elemtype.isInteger() && (sz < 8)) sz = 8;
-          SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
-                                        OutVals[i],
-                                        DAG.getConstant(curOffset,
-                                                        getPointerTy()));
-          SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
-                                MachinePointerInfo(), false, false, false, 0);
+          if (elemtype.isInteger() && (sz < 8))
+            sz = 8;
+          SDValue srcAddr =
+              DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
+                          DAG.getConstant(curOffset, getPointerTy()));
+          SDValue theVal =
+              DAG.getLoad(elemtype, dl, tempChain, srcAddr,
+                          MachinePointerInfo(), false, false, false, 0);
           SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-          SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount,
-                                                            MVT::i32),
-                                           DAG.getConstant(curOffset, MVT::i32),
-                                                            theVal, InFlag };
+          SDValue CopyParamOps[] = { Chain,
+                                     DAG.getConstant(paramCount, MVT::i32),
+                                     DAG.getConstant(curOffset, MVT::i32),
+                                     theVal, InFlag };
           Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
                               CopyParamOps, 5);
           InFlag = Chain.getValue(1);
-          curOffset += sz/8;
+          curOffset += sz / 8;
         }
       }
       ++paramCount;
@@ -530,30 +555,31 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // Non-abi, struct or vector
     // Declare a bunch or .reg .b<size> .param<n>
     unsigned curOffset = 0;
-    for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
+    for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
       unsigned elems = 1;
       EVT elemtype = vtparts[j];
       if (vtparts[j].isVector()) {
         elems = vtparts[j].getVectorNumElements();
         elemtype = vtparts[j].getVectorElementType();
       }
-      for (unsigned k=0,ke=elems; k!=ke; ++k) {
+      for (unsigned k = 0, ke = elems; k != ke; ++k) {
         unsigned sz = elemtype.getSizeInBits();
-        if (elemtype.isInteger() && (sz < 32)) sz = 32;
+        if (elemtype.isInteger() && (sz < 32))
+          sz = 32;
         SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-        SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount,
-                                                             MVT::i32),
-                                                  DAG.getConstant(sz, MVT::i32),
-                                                   DAG.getConstant(1, MVT::i32),
-                                                             InFlag };
+        SDValue DeclareParamOps[] = { Chain,
+                                      DAG.getConstant(paramCount, MVT::i32),
+                                      DAG.getConstant(sz, MVT::i32),
+                                      DAG.getConstant(1, MVT::i32), InFlag };
         Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
                             DeclareParamOps, 5);
         InFlag = Chain.getValue(1);
-        SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
-                                      DAG.getConstant(curOffset,
-                                                      getPointerTy()));
-        SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
-                                  MachinePointerInfo(), false, false, false, 0);
+        SDValue srcAddr =
+            DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
+                        DAG.getConstant(curOffset, getPointerTy()));
+        SDValue theVal =
+            DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(),
+                        false, false, false, 0);
         SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
         SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
                                    DAG.getConstant(0, MVT::i32), theVal,
@@ -578,20 +604,21 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or
     // individual .reg .b<size> func_retval<0..> for non ABI
     unsigned resultsz = 0;
-    for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) {
+    for (unsigned i = 0, e = resvtparts.size(); i != e; ++i) {
       unsigned elems = 1;
       EVT elemtype = resvtparts[i];
       if (resvtparts[i].isVector()) {
         elems = resvtparts[i].getVectorNumElements();
         elemtype = resvtparts[i].getVectorElementType();
       }
-      for (unsigned j=0,je=elems; j!=je; ++j) {
+      for (unsigned j = 0, je = elems; j != je; ++j) {
         unsigned sz = elemtype.getSizeInBits();
         if (isABI == false) {
-          if (elemtype.isInteger() && (sz < 32)) sz = 32;
-        }
-        else {
-          if (elemtype.isInteger() && (sz < 8)) sz = 8;
+          if (elemtype.isInteger() && (sz < 32))
+            sz = 32;
+        } else {
+          if (elemtype.isInteger() && (sz < 8))
+            sz = 8;
         }
         if (isABI == false) {
           SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -609,7 +636,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     }
     if (isABI) {
       if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
-          retTy->isPointerTy() ) {
+          retTy->isPointerTy()) {
         // Scalar needs to be at least 32bit wide
         if (resultsz < 32)
           resultsz = 32;
@@ -620,8 +647,7 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
                             DeclareRetOps, 5);
         InFlag = Chain.getValue(1);
-      }
-      else {
+      } else {
         if (Func) { // direct call
           if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment))
             retAlignment = getDataLayout()->getABITypeAlignment(retTy);
@@ -631,10 +657,10 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
             retAlignment = getDataLayout()->getABITypeAlignment(retTy);
         }
         SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-        SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment,
-                                                           MVT::i32),
-                                          DAG.getConstant(resultsz/8, MVT::i32),
-                                         DAG.getConstant(0, MVT::i32), InFlag };
+        SDValue DeclareRetOps[] = { Chain,
+                                    DAG.getConstant(retAlignment, MVT::i32),
+                                    DAG.getConstant(resultsz / 8, MVT::i32),
+                                    DAG.getConstant(0, MVT::i32), InFlag };
         Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
                             DeclareRetOps, 5);
         InFlag = Chain.getValue(1);
@@ -652,24 +678,24 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // INLINEASM SDNode.
     SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
     std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment);
-    const char *asmstr = nvTM->getManagedStrPool()->
-        getManagedString(proto_string.c_str())->c_str();
-    SDValue InlineAsmOps[] = { Chain,
-                               DAG.getTargetExternalSymbol(asmstr,
-                                                           getPointerTy()),
-                                                           DAG.getMDNode(0),
-                                   DAG.getTargetConstant(0, MVT::i32), InFlag };
+    const char *asmstr = nvTM->getManagedStrPool()
+        ->getManagedString(proto_string.c_str())->c_str();
+    SDValue InlineAsmOps[] = {
+      Chain, DAG.getTargetExternalSymbol(asmstr, getPointerTy()),
+      DAG.getMDNode(0), DAG.getTargetConstant(0, MVT::i32), InFlag
+    };
     Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
     InFlag = Chain.getValue(1);
   }
   // Op to just print "call"
   SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-  SDValue PrintCallOps[] = { Chain,
-                             DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1)
-                                 : retCount, MVT::i32),
-                                   InFlag };
-  Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl,
-      PrintCallVTs, PrintCallOps, 3);
+  SDValue PrintCallOps[] = {
+    Chain,
+    DAG.getConstant(isABI ? ((Ins.size() == 0) ? 0 : 1) : retCount, MVT::i32),
+    InFlag
+  };
+  Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
+                      dl, PrintCallVTs, PrintCallOps, 3);
   InFlag = Chain.getValue(1);
 
   // Ops to print out the function name
@@ -685,31 +711,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                       CallArgBeginOps, 2);
   InFlag = Chain.getValue(1);
 
-  for (unsigned i=0, e=paramCount; i!=e; ++i) {
+  for (unsigned i = 0, e = paramCount; i != e; ++i) {
     unsigned opcode;
-    if (i==(e-1))
+    if (i == (e - 1))
       opcode = NVPTXISD::LastCallArg;
     else
       opcode = NVPTXISD::CallArg;
     SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
     SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
-                             DAG.getConstant(i, MVT::i32),
-                             InFlag };
+                             DAG.getConstant(i, MVT::i32), InFlag };
     Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
     InFlag = Chain.getValue(1);
   }
   SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-  SDValue CallArgEndOps[] = { Chain,
-                              DAG.getConstant(Func ? 1 : 0, MVT::i32),
+  SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32),
                               InFlag };
-  Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps,
-                      3);
+  Chain =
+      DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3);
   InFlag = Chain.getValue(1);
 
   if (!Func) {
     SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-    SDValue PrototypeOps[] = { Chain,
-                               DAG.getConstant(uniqueCallSite, MVT::i32),
+    SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32),
                                InFlag };
     Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
     InFlag = Chain.getValue(1);
@@ -719,32 +742,28 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   if (Ins.size() > 0) {
     if (isABI) {
       unsigned resoffset = 0;
-      for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
+      for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
         unsigned sz = Ins[i].VT.getSizeInBits();
-        if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8;
+        if (Ins[i].VT.isInteger() && (sz < 8))
+          sz = 8;
         EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue };
-        SDValue LoadRetOps[] = {
-          Chain,
-          DAG.getConstant(1, MVT::i32),
-          DAG.getConstant(resoffset, MVT::i32),
-          InFlag
-        };
+        SDValue LoadRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
+                                 DAG.getConstant(resoffset, MVT::i32), InFlag };
         SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
                                      LoadRetOps, array_lengthof(LoadRetOps));
         Chain = retval.getValue(1);
         InFlag = retval.getValue(2);
         InVals.push_back(retval);
-        resoffset += sz/8;
+        resoffset += sz / 8;
       }
-    }
-    else {
+    } else {
       SmallVector<EVT, 16> resvtparts;
       ComputeValueVTs(*this, retTy, resvtparts);
 
       assert(Ins.size() == resvtparts.size() &&
              "Unexpected number of return values in non-ABI case");
       unsigned paramNum = 0;
-      for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
+      for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
         assert(EVT(Ins[i].VT) == resvtparts[i] &&
                "Unexpected EVT type in non-ABI case");
         unsigned numelems = 1;
@@ -754,14 +773,11 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
           elemtype = Ins[i].VT.getVectorElementType();
         }
         std::vector<SDValue> tempRetVals;
-        for (unsigned j=0; j<numelems; ++j) {
+        for (unsigned j = 0; j < numelems; ++j) {
           EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue };
-          SDValue MoveRetOps[] = {
-            Chain,
-            DAG.getConstant(0, MVT::i32),
-            DAG.getConstant(paramNum, MVT::i32),
-            InFlag
-          };
+          SDValue MoveRetOps[] = { Chain, DAG.getConstant(0, MVT::i32),
+                                   DAG.getConstant(paramNum, MVT::i32),
+                                   InFlag };
           SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
                                        MoveRetOps, array_lengthof(MoveRetOps));
           Chain = retval.getValue(1);
@@ -777,9 +793,8 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       }
     }
   }
-  Chain = DAG.getCALLSEQ_END(Chain,
-                             DAG.getIntPtrConstant(uniqueCallSite, true),
-                             DAG.getIntPtrConstant(uniqueCallSite+1, true),
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
+                             DAG.getIntPtrConstant(uniqueCallSite + 1, true),
                              InFlag);
   uniqueCallSite++;
 
@@ -792,45 +807,51 @@ NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
 // (see LegalizeDAG.cpp). This is slow and uses local memory.
 // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
-SDValue NVPTXTargetLowering::
-LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
+SDValue
+NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   DebugLoc dl = Node->getDebugLoc();
   SmallVector<SDValue, 8> Ops;
   unsigned NumOperands = Node->getNumOperands();
-  for (unsigned i=0; i < NumOperands; ++i) {
+  for (unsigned i = 0; i < NumOperands; ++i) {
     SDValue SubOp = Node->getOperand(i);
     EVT VVT = SubOp.getNode()->getValueType(0);
     EVT EltVT = VVT.getVectorElementType();
     unsigned NumSubElem = VVT.getVectorNumElements();
-    for (unsigned j=0; j < NumSubElem; ++j) {
+    for (unsigned j = 0; j < NumSubElem; ++j) {
       Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
                                 DAG.getIntPtrConstant(j)));
     }
   }
-  return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
-                     &Ops[0], Ops.size());
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0],
+                     Ops.size());
 }
 
-SDValue NVPTXTargetLowering::
-LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+SDValue
+NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
-  case ISD::RETURNADDR: return SDValue();
-  case ISD::FRAMEADDR:  return SDValue();
-  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
-  case ISD::INTRINSIC_W_CHAIN: return Op;
+  case ISD::RETURNADDR:
+    return SDValue();
+  case ISD::FRAMEADDR:
+    return SDValue();
+  case ISD::GlobalAddress:
+    return LowerGlobalAddress(Op, DAG);
+  case ISD::INTRINSIC_W_CHAIN:
+    return Op;
   case ISD::BUILD_VECTOR:
   case ISD::EXTRACT_SUBVECTOR:
     return Op;
-  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
-  case ISD::STORE: return LowerSTORE(Op, DAG);
-  case ISD::LOAD: return LowerLOAD(Op, DAG);
+  case ISD::CONCAT_VECTORS:
+    return LowerCONCAT_VECTORS(Op, DAG);
+  case ISD::STORE:
+    return LowerSTORE(Op, DAG);
+  case ISD::LOAD:
+    return LowerLOAD(Op, DAG);
   default:
     llvm_unreachable("Custom lowering not defined for operation");
   }
 }
 
-
 SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   if (Op.getValueType() == MVT::i1)
     return LowerLOADi1(Op, DAG);
@@ -842,24 +863,22 @@ SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
 //   =>
 // v1 = ld i8* addr
 // v = trunc v1 to i1
-SDValue NVPTXTargetLowering::
-LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
+SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   LoadSDNode *LD = cast<LoadSDNode>(Node);
   DebugLoc dl = Node->getDebugLoc();
-  assert(LD->getExtensionType() == ISD::NON_EXTLOAD) ;
+  assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
   assert(Node->getValueType(0) == MVT::i1 &&
          "Custom lowering for i1 load only");
-  SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
-                              LD->getPointerInfo(),
-                              LD->isVolatile(), LD->isNonTemporal(),
-                              LD->isInvariant(),
-                              LD->getAlignment());
+  SDValue newLD =
+      DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
+                  LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(),
+                  LD->isInvariant(), LD->getAlignment());
   SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
   // The legalizer (the caller) is expecting two values from the legalized
   // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
   // in LegalizeDAG.cpp which also uses MergeValues.
-  SDValue Ops[] = {result, LD->getChain()};
+  SDValue Ops[] = { result, LD->getChain() };
   return DAG.getMergeValues(Ops, 2, dl);
 }
 
@@ -887,7 +906,8 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
     if (!ValVT.isSimple())
       return SDValue();
     switch (ValVT.getSimpleVT().SimpleTy) {
-    default: return SDValue();
+    default:
+      return SDValue();
     case MVT::v2i8:
     case MVT::v2i16:
     case MVT::v2i32:
@@ -914,7 +934,8 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
       NeedExt = true;
 
     switch (NumElts) {
-    default:  return SDValue();
+    default:
+      return SDValue();
     case 2:
       Opcode = NVPTXISD::StoreV2;
       break;
@@ -947,11 +968,9 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
 
     MemSDNode *MemSD = cast<MemSDNode>(N);
 
-    SDValue NewSt = DAG.getMemIntrinsicNode(Opcode, DL,
-                                            DAG.getVTList(MVT::Other), &Ops[0],
-                                            Ops.size(), MemSD->getMemoryVT(),
-                                            MemSD->getMemOperand());
-
+    SDValue NewSt = DAG.getMemIntrinsicNode(
+        Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(),
+        MemSD->getMemoryVT(), MemSD->getMemOperand());
 
     //return DCI.CombineTo(N, NewSt, true);
     return NewSt;
@@ -964,8 +983,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
 //    =>
 // v1 = zxt v to i8
 // st i8, addr
-SDValue NVPTXTargetLowering::
-LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
+SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   DebugLoc dl = Node->getDebugLoc();
   StoreSDNode *ST = cast<StoreSDNode>(Node);
@@ -976,18 +994,14 @@ LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
   unsigned Alignment = ST->getAlignment();
   bool isVolatile = ST->isVolatile();
   bool isNonTemporal = ST->isNonTemporal();
-  Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl,
-                     MVT::i8, Tmp3);
-  SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
-                                ST->getPointerInfo(), isVolatile,
-                                isNonTemporal, Alignment);
+  Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Tmp3);
+  SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                                isVolatile, isNonTemporal, Alignment);
   return Result;
 }
 
-
-SDValue
-NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx,
-                                EVT v) const {
+SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname,
+                                        int idx, EVT v) const {
   std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
   std::stringstream suffix;
   suffix << idx;
@@ -1000,19 +1014,16 @@ NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
   return getExtSymb(DAG, ".PARAM", idx, v);
 }
 
-SDValue
-NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
+SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
   return getExtSymb(DAG, ".HLPPARAM", idx);
 }
 
 // Check to see if the kernel argument is image*_t or sampler_t
 
 bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
-  static const char *const specialTypes[] = {
-                                             "struct._image2d_t",
-                                             "struct._image3d_t",
-                                             "struct._sampler_t"
-  };
+  static const char *const specialTypes[] = { "struct._image2d_t",
+                                              "struct._image3d_t",
+                                              "struct._sampler_t" };
 
   const Type *Ty = arg->getType();
   const PointerType *PTy = dyn_cast<PointerType>(Ty);
@@ -1033,12 +1044,10 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
   return false;
 }
 
-SDValue
-NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
-                                        CallingConv::ID CallConv, bool isVarArg,
-                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                                          DebugLoc dl, SelectionDAG &DAG,
-                                       SmallVectorImpl<SDValue> &InVals) const {
+SDValue NVPTXTargetLowering::LowerFormalArguments(
+    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+    const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG,
+    SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   const DataLayout *TD = getDataLayout();
 
@@ -1054,34 +1063,43 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
   std::vector<Type *> argTypes;
   std::vector<const Argument *> theArgs;
   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
-      I != E; ++I) {
+       I != E; ++I) {
     theArgs.push_back(I);
     argTypes.push_back(I->getType());
   }
-  assert(argTypes.size() == Ins.size() &&
-         "Ins types and function types did not match");
+  //assert(argTypes.size() == Ins.size() &&
+  //       "Ins types and function types did not match");
 
   int idx = 0;
-  for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) {
+  for (unsigned i = 0, e = argTypes.size(); i != e; ++i, ++idx) {
     Type *Ty = argTypes[i];
     EVT ObjectVT = getValueType(Ty);
-    assert(ObjectVT == Ins[i].VT &&
-           "Ins type did not match function type");
+    //assert(ObjectVT == Ins[i].VT &&
+    //       "Ins type did not match function type");
 
     // If the kernel argument is image*_t or sampler_t, convert it to
     // a i32 constant holding the parameter position. This can later
     // matched in the AsmPrinter to output the correct mangled name.
-    if (isImageOrSamplerVal(theArgs[i],
-                           (theArgs[i]->getParent() ?
-                               theArgs[i]->getParent()->getParent() : 0))) {
+    if (isImageOrSamplerVal(
+            theArgs[i],
+            (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
+                                     : 0))) {
       assert(isKernel && "Only kernels can have image/sampler params");
-      InVals.push_back(DAG.getConstant(i+1, MVT::i32));
+      InVals.push_back(DAG.getConstant(i + 1, MVT::i32));
       continue;
     }
 
     if (theArgs[i]->use_empty()) {
       // argument is dead
-      InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
+      if (ObjectVT.isVector()) {
+        EVT EltVT = ObjectVT.getVectorElementType();
+        unsigned NumElts = ObjectVT.getVectorNumElements();
+        for (unsigned vi = 0; vi < NumElts; ++vi) {
+          InVals.push_back(DAG.getNode(ISD::UNDEF, dl, EltVT));
+        }
+      } else {
+        InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
+      }
       continue;
     }
 
@@ -1089,31 +1107,52 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
     // to newly created nodes. The SDNOdes for params have to
     // appear in the same order as their order of appearance
     // in the original function. "idx+1" holds that order.
-    if (PAL.hasAttribute(i+1, Attribute::ByVal) == false) {
+    if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) {
+      if (ObjectVT.isVector()) {
+        unsigned NumElts = ObjectVT.getVectorNumElements();
+        EVT EltVT = ObjectVT.getVectorElementType();
+        unsigned Offset = 0;
+        for (unsigned vi = 0; vi < NumElts; ++vi) {
+          SDValue A = getParamSymbol(DAG, idx, getPointerTy());
+          SDValue B = DAG.getIntPtrConstant(Offset);
+          SDValue Addr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                     //getParamSymbol(DAG, idx, EltVT),
+                                     //DAG.getConstant(Offset, getPointerTy()));
+                                     A, B);
+          Value *SrcValue = Constant::getNullValue(PointerType::get(
+              EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
+          SDValue Ld = DAG.getLoad(
+              EltVT, dl, Root, Addr, MachinePointerInfo(SrcValue), false, false,
+              false,
+              TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
+          Offset += EltVT.getStoreSizeInBits() / 8;
+          InVals.push_back(Ld);
+        }
+        continue;
+      }
+
       // A plain scalar.
       if (isABI || isKernel) {
         // If ABI, load from the param symbol
         SDValue Arg = getParamSymbol(DAG, idx);
         // Conjure up a value that we can get the address space from.
         // FIXME: Using a constant here is a hack.
-        Value *srcValue = Constant::getNullValue(PointerType::get(
-                              ObjectVT.getTypeForEVT(F->getContext()),
-                              llvm::ADDRESS_SPACE_PARAM));
-        SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg,
-                                MachinePointerInfo(srcValue), false, false,
-                                false,
-                                TD->getABITypeAlignment(ObjectVT.getTypeForEVT(
-                                  F->getContext())));
+        Value *srcValue = Constant::getNullValue(
+            PointerType::get(ObjectVT.getTypeForEVT(F->getContext()),
+                             llvm::ADDRESS_SPACE_PARAM));
+        SDValue p = DAG.getLoad(
+            ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false,
+            false,
+            TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
         if (p.getNode())
-          DAG.AssignOrdering(p.getNode(), idx+1);
+          DAG.AssignOrdering(p.getNode(), idx + 1);
         InVals.push_back(p);
-      }
-      else {
+      } else {
         // If no ABI, just move the param symbol
         SDValue Arg = getParamSymbol(DAG, idx, ObjectVT);
         SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
         if (p.getNode())
-          DAG.AssignOrdering(p.getNode(), idx+1);
+          DAG.AssignOrdering(p.getNode(), idx + 1);
         InVals.push_back(p);
       }
       continue;
@@ -1130,47 +1169,49 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
       SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
       SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
       if (p.getNode())
-        DAG.AssignOrdering(p.getNode(), idx+1);
+        DAG.AssignOrdering(p.getNode(), idx + 1);
       if (isKernel)
         InVals.push_back(p);
       else {
-        SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
-                    DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32),
-                                 p);
+        SDValue p2 = DAG.getNode(
+            ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
+            DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p);
         InVals.push_back(p2);
       }
     } else {
       // Have to move a set of param symbols to registers and
       // store them locally and return the local pointer in InVals
       const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]);
-      assert(elemPtrType &&
-             "Byval parameter should be a pointer type");
+      assert(elemPtrType && "Byval parameter should be a pointer type");
       Type *elemType = elemPtrType->getElementType();
       // Compute the constituent parts
       SmallVector<EVT, 16> vtparts;
       SmallVector<uint64_t, 16> offsets;
       ComputeValueVTs(*this, elemType, vtparts, &offsets, 0);
       unsigned totalsize = 0;
-      for (unsigned j=0, je=vtparts.size(); j!=je; ++j)
+      for (unsigned j = 0, je = vtparts.size(); j != je; ++j)
         totalsize += vtparts[j].getStoreSizeInBits();
-      SDValue localcopy =  DAG.getFrameIndex(MF.getFrameInfo()->
-                                      CreateStackObject(totalsize/8, 16, false),
-                                             getPointerTy());
+      SDValue localcopy = DAG.getFrameIndex(
+          MF.getFrameInfo()->CreateStackObject(totalsize / 8, 16, false),
+          getPointerTy());
       unsigned sizesofar = 0;
       std::vector<SDValue> theChains;
-      for (unsigned j=0, je=vtparts.size(); j!=je; ++j) {
+      for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
         unsigned numElems = 1;
-        if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements();
-        for (unsigned k=0, ke=numElems; k!=ke; ++k) {
+        if (vtparts[j].isVector())
+          numElems = vtparts[j].getVectorNumElements();
+        for (unsigned k = 0, ke = numElems; k != ke; ++k) {
           EVT tmpvt = vtparts[j];
-          if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType();
+          if (tmpvt.isVector())
+            tmpvt = tmpvt.getVectorElementType();
           SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt,
                                     getParamSymbol(DAG, idx, tmpvt));
-          SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
-                                    DAG.getConstant(sizesofar, getPointerTy()));
-          theChains.push_back(DAG.getStore(Chain, dl, arg, addr,
-                                        MachinePointerInfo(), false, false, 0));
-          sizesofar += tmpvt.getStoreSizeInBits()/8;
+          SDValue addr =
+              DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
+                          DAG.getConstant(sizesofar, getPointerTy()));
+          theChains.push_back(DAG.getStore(
+              Chain, dl, arg, addr, MachinePointerInfo(), false, false, 0));
+          sizesofar += tmpvt.getStoreSizeInBits() / 8;
           ++idx;
         }
       }
@@ -1190,43 +1231,42 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
   //}
 
   if (!OutChains.empty())
-    DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                            &OutChains[0], OutChains.size()));
+    DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0],
+                            OutChains.size()));
 
   return Chain;
 }
 
-SDValue
-NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
-                                 bool isVarArg,
-                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                 const SmallVectorImpl<SDValue> &OutVals,
-                                 DebugLoc dl, SelectionDAG &DAG) const {
+SDValue NVPTXTargetLowering::LowerReturn(
+    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+    const SmallVectorImpl<ISD::OutputArg> &Outs,
+    const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl,
+    SelectionDAG &DAG) const {
 
   bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
 
   unsigned sizesofar = 0;
   unsigned idx = 0;
-  for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
     SDValue theVal = OutVals[i];
     EVT theValType = theVal.getValueType();
     unsigned numElems = 1;
-    if (theValType.isVector()) numElems = theValType.getVectorNumElements();
-    for (unsigned j=0,je=numElems; j!=je; ++j) {
+    if (theValType.isVector())
+      numElems = theValType.getVectorNumElements();
+    for (unsigned j = 0, je = numElems; j != je; ++j) {
       SDValue tmpval = theVal;
       if (theValType.isVector())
         tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                             theValType.getVectorElementType(),
-                             tmpval, DAG.getIntPtrConstant(j));
-      Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval,
-          dl, MVT::Other,
-          Chain,
-          DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
+                             theValType.getVectorElementType(), tmpval,
+                             DAG.getIntPtrConstant(j));
+      Chain = DAG.getNode(
+          isABI ? NVPTXISD::StoreRetval : NVPTXISD::MoveToRetval, dl,
+          MVT::Other, Chain, DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
           tmpval);
       if (theValType.isVector())
-        sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8;
+        sizesofar += theValType.getVectorElementType().getStoreSizeInBits() / 8;
       else
-        sizesofar += theValType.getStoreSizeInBits()/8;
+        sizesofar += theValType.getStoreSizeInBits() / 8;
       ++idx;
     }
   }
@@ -1234,12 +1274,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
 }
 
-void
-NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
-                                                  std::string &Constraint,
-                                                  std::vector<SDValue> &Ops,
-                                                  SelectionDAG &DAG) const
-{
+void NVPTXTargetLowering::LowerAsmOperandForConstraint(
+    SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
+    SelectionDAG &DAG) const {
   if (Constraint.length() > 1)
     return;
   else
@@ -1249,8 +1286,7 @@ NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
 // NVPTX suuport vector of legal types of any length in Intrinsics because the
 // NVPTX specific type legalizer
 // will legalize them to the PTX supported length.
-bool
-NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
+bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
   if (isTypeLegal(VT))
     return true;
   if (VT.isVector()) {
@@ -1261,15 +1297,13 @@ NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
   return false;
 }
 
-
 // llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
 // TgtMemIntrinsic
 // because we need the information that is only available in the "Value" type
 // of destination
 // pointer. In particular, the address space information.
-bool
-NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
-                                        unsigned Intrinsic) const {
+bool NVPTXTargetLowering::getTgtMemIntrinsic(
+    IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const {
   switch (Intrinsic) {
   default:
     return false;
@@ -1325,9 +1359,8 @@ NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
 /// Used to guide target specific optimizations, like loop strength reduction
 /// (LoopStrengthReduce.cpp) and memory optimization for address mode
 /// (CodeGenPrepare.cpp)
-bool
-NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                           Type *Ty) const {
+bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                                Type *Ty) const {
 
   // AddrMode - This represents an addressing mode of:
   //    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
@@ -1345,10 +1378,10 @@ NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
   }
 
   switch (AM.Scale) {
-  case 0:  // "r", "r+i" or "i" is allowed
+  case 0: // "r", "r+i" or "i" is allowed
     break;
   case 1:
-    if (AM.HasBaseReg)  // "r+r+i" or "r+r" is not allowed.
+    if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
       return false;
     // Otherwise we have r+i.
     break;
@@ -1385,8 +1418,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
   return TargetLowering::getConstraintType(Constraint);
 }
 
-
-std::pair<unsigned, const TargetRegisterClass*>
+std::pair<unsigned, const TargetRegisterClass *>
 NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
                                                   EVT VT) const {
   if (Constraint.size() == 1) {
@@ -1409,8 +1441,6 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
 
-
-
 /// getFunctionAlignment - Return the Log2 alignment of this function.
 unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
   return 4;
@@ -1418,7 +1448,7 @@ unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
 
 /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
 static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
-                              SmallVectorImpl<SDValue>& Results) {
+                              SmallVectorImpl<SDValue> &Results) {
   EVT ResVT = N->getValueType(0);
   DebugLoc DL = N->getDebugLoc();
 
@@ -1429,7 +1459,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
   // but I'm leaving that as a TODO for now.
   assert(ResVT.isSimple() && "Can only handle simple types");
   switch (ResVT.getSimpleVT().SimpleTy) {
-  default: return;
+  default:
+    return;
   case MVT::v2i8:
   case MVT::v2i16:
   case MVT::v2i32:
@@ -1460,7 +1491,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
   SDVTList LdResVTs;
 
   switch (NumElts) {
-  default:  return;
+  default:
+    return;
   case 2:
     Opcode = NVPTXISD::LoadV2;
     LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
@@ -1500,14 +1532,14 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
 
   SDValue LoadChain = NewLD.getValue(NumElts);
 
-  SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+  SDValue BuildVec =
+      DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
 
   Results.push_back(BuildVec);
   Results.push_back(LoadChain);
 }
 
-static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
-                                     SelectionDAG &DAG,
+static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
                                      SmallVectorImpl<SDValue> &Results) {
   SDValue Chain = N->getOperand(0);
   SDValue Intrin = N->getOperand(1);
@@ -1515,8 +1547,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
 
   // Get the intrinsic ID
   unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
-  switch(IntrinNo) {
-  default: return;
+  switch (IntrinNo) {
+  default:
+    return;
   case Intrinsic::nvvm_ldg_global_i:
   case Intrinsic::nvvm_ldg_global_f:
   case Intrinsic::nvvm_ldg_global_p:
@@ -1544,10 +1577,12 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
       SDVTList LdResVTs;
 
       switch (NumElts) {
-      default:  return;
+      default:
+        return;
       case 2:
-        switch(IntrinNo) {
-        default: return;
+        switch (IntrinNo) {
+        default:
+          return;
         case Intrinsic::nvvm_ldg_global_i:
         case Intrinsic::nvvm_ldg_global_f:
         case Intrinsic::nvvm_ldg_global_p:
@@ -1562,8 +1597,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
         LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
         break;
       case 4: {
-        switch(IntrinNo) {
-        default: return;
+        switch (IntrinNo) {
+        default:
+          return;
         case Intrinsic::nvvm_ldg_global_i:
         case Intrinsic::nvvm_ldg_global_f:
         case Intrinsic::nvvm_ldg_global_p:
@@ -1586,29 +1622,31 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
       // Copy regular operands
 
       OtherOps.push_back(Chain); // Chain
-      // Skip operand 1 (intrinsic ID)
-      // Others
+                                 // Skip operand 1 (intrinsic ID)
+                                 // Others
       for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
         OtherOps.push_back(N->getOperand(i));
 
       MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
 
-      SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
-                                              OtherOps.size(), MemSD->getMemoryVT(),
-                                              MemSD->getMemOperand());
+      SDValue NewLD = DAG.getMemIntrinsicNode(
+          Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(),
+          MemSD->getMemoryVT(), MemSD->getMemOperand());
 
       SmallVector<SDValue, 4> ScalarRes;
 
       for (unsigned i = 0; i < NumElts; ++i) {
         SDValue Res = NewLD.getValue(i);
         if (NeedTrunc)
-          Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
+          Res =
+              DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
         ScalarRes.push_back(Res);
       }
 
       SDValue LoadChain = NewLD.getValue(NumElts);
 
-      SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+      SDValue BuildVec =
+          DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
 
       Results.push_back(BuildVec);
       Results.push_back(LoadChain);
@@ -1629,10 +1667,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
 
       // We make sure the memory type is i8, which will be used during isel
       // to select the proper instruction.
-      SDValue NewLD = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL,
-                                              LdResVTs, &Ops[0],
-                                              Ops.size(), MVT::i8,
-                                              MemSD->getMemOperand());
+      SDValue NewLD =
+          DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0],
+                                  Ops.size(), MVT::i8, MemSD->getMemOperand());
 
       Results.push_back(NewLD.getValue(0));
       Results.push_back(NewLD.getValue(1));
@@ -1641,11 +1678,11 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
   }
 }
 
-void NVPTXTargetLowering::ReplaceNodeResults(SDNode *N,
-                                             SmallVectorImpl<SDValue> &Results,
-                                             SelectionDAG &DAG) const {
+void NVPTXTargetLowering::ReplaceNodeResults(
+    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
   switch (N->getOpcode()) {
-  default: report_fatal_error("Unhandled custom legalization");
+  default:
+    report_fatal_error("Unhandled custom legalization");
   case ISD::LOAD:
     ReplaceLoadVector(N, DAG, Results);
     return;
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 14afc148cb..3cd49d38af 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -87,7 +87,7 @@ public:
 
   bool isTypeSupportedInIntrinsic(MVT VT) const;
 
-  bool getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
+  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
                           unsigned Intrinsic) const;
 
   /// isLegalAddressingMode - Return true if the addressing mode represented
@@ -107,14 +107,13 @@ public:
   }
 
   ConstraintType getConstraintType(const std::string &Constraint) const;
-  std::pair<unsigned, const TargetRegisterClass*>
+  std::pair<unsigned, const TargetRegisterClass *>
   getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
 
-  virtual SDValue
-  LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
-                       const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl,
-                       SelectionDAG &DAG,
-                       SmallVectorImpl<SDValue> &InVals) const;
+  virtual SDValue LowerFormalArguments(
+      SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+      const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG,
+      SmallVectorImpl<SDValue> &InVals) const;
 
   virtual SDValue
   LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const;
@@ -136,17 +135,15 @@ public:
   NVPTXTargetMachine *nvTM;
 
   // PTX always uses 32-bit shift amounts
-  virtual MVT getScalarShiftAmountTy(EVT LHSTy) const {
-    return MVT::i32;
-  }
+  virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
 
   virtual bool shouldSplitVectorElementType(EVT VT) const;
 
 private:
-  const NVPTXSubtarget &nvptxSubtarget;  // cache the subtarget here
+  const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
 
-  SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT =
-                         MVT::i32) const;
+  SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx,
+                     EVT = MVT::i32) const;
   SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const;
   SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
 
@@ -159,8 +156,7 @@ private:
   SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
 
-  virtual void ReplaceNodeResults(SDNode *N,
-                                  SmallVectorImpl<SDValue> &Results,
+  virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
                                   SelectionDAG &DAG) const;
 };
 } // namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 9e73d80c28..33a63c26f4 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -23,61 +23,55 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include <cstdio>
 
-
 using namespace llvm;
 
 // FIXME: Add the subtarget support on this constructor.
 NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
-: NVPTXGenInstrInfo(),
-  TM(tm),
-  RegInfo(*this, *TM.getSubtargetImpl()) {}
-
+    : NVPTXGenInstrInfo(), TM(tm), RegInfo(*this, *TM.getSubtargetImpl()) {}
 
-void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB,
-                                  MachineBasicBlock::iterator I, DebugLoc DL,
-                                  unsigned DestReg, unsigned SrcReg,
-                                  bool KillSrc) const {
+void NVPTXInstrInfo::copyPhysReg(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
+    unsigned DestReg, unsigned SrcReg, bool KillSrc) const {
   if (NVPTX::Int32RegsRegClass.contains(DestReg) &&
       NVPTX::Int32RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Int8RegsRegClass.contains(DestReg) &&
-      NVPTX::Int8RegsRegClass.contains(SrcReg))
+           NVPTX::Int8RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Int1RegsRegClass.contains(DestReg) &&
-      NVPTX::Int1RegsRegClass.contains(SrcReg))
+           NVPTX::Int1RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Float32RegsRegClass.contains(DestReg) &&
-      NVPTX::Float32RegsRegClass.contains(SrcReg))
+           NVPTX::Float32RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Int16RegsRegClass.contains(DestReg) &&
-      NVPTX::Int16RegsRegClass.contains(SrcReg))
+           NVPTX::Int16RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Int64RegsRegClass.contains(DestReg) &&
-      NVPTX::Int64RegsRegClass.contains(SrcReg))
+           NVPTX::Int64RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else if (NVPTX::Float64RegsRegClass.contains(DestReg) &&
-      NVPTX::Float64RegsRegClass.contains(SrcReg))
+           NVPTX::Float64RegsRegClass.contains(SrcReg))
     BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+        .addReg(SrcReg, getKillRegState(KillSrc));
   else {
     llvm_unreachable("Don't know how to copy a register");
   }
 }
 
-bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI,
-                                 unsigned &SrcReg,
+bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
                                  unsigned &DestReg) const {
   // Look for the appropriate part of TSFlags
   bool isMove = false;
 
-  unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >>
-      NVPTX::SimpleMoveShift;
+  unsigned TSFlags =
+      (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >> NVPTX::SimpleMoveShift;
   isMove = (TSFlags == 1);
 
   if (isMove) {
@@ -94,10 +88,10 @@ bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI,
   return false;
 }
 
-bool  NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const
-{
+bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const {
   switch (MI.getOpcode()) {
-  default: return false;
+  default:
+    return false;
   case NVPTX::INT_PTX_SREG_NTID_X:
   case NVPTX::INT_PTX_SREG_NTID_Y:
   case NVPTX::INT_PTX_SREG_NTID_Z:
@@ -115,12 +109,11 @@ bool  NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const
   }
 }
 
-
 bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
                                  unsigned &AddrSpace) const {
   bool isLoad = false;
-  unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isLoadMask) >>
-      NVPTX::isLoadShift;
+  unsigned TSFlags =
+      (MI.getDesc().TSFlags & NVPTX::isLoadMask) >> NVPTX::isLoadShift;
   isLoad = (TSFlags == 1);
   if (isLoad)
     AddrSpace = getLdStCodeAddrSpace(MI);
@@ -130,15 +123,14 @@ bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
 bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI,
                                   unsigned &AddrSpace) const {
   bool isStore = false;
-  unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >>
-      NVPTX::isStoreShift;
+  unsigned TSFlags =
+      (MI.getDesc().TSFlags & NVPTX::isStoreMask) >> NVPTX::isStoreShift;
   isStore = (TSFlags == 1);
   if (isStore)
     AddrSpace = getLdStCodeAddrSpace(MI);
   return isStore;
 }
 
-
 bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
   unsigned addrspace = 0;
   if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS)
@@ -152,7 +144,6 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
   return true;
 }
 
-
 /// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
 /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
 /// implemented for a target).  Upon success, this returns false and returns
@@ -176,11 +167,9 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
 /// Note that RemoveBranch and InsertBranch must be implemented to support
 /// cases where this method returns success.
 ///
-bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
-                                   MachineBasicBlock *&TBB,
-                                   MachineBasicBlock *&FBB,
-                                   SmallVectorImpl<MachineOperand> &Cond,
-                                   bool AllowModify) const {
+bool NVPTXInstrInfo::AnalyzeBranch(
+    MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+    SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const {
   // If the block has no terminators, it just falls into the block after it.
   MachineBasicBlock::iterator I = MBB.end();
   if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
@@ -208,14 +197,13 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
   MachineInstr *SecondLastInst = I;
 
   // If there are three terminators, we don't know what sort of block this is.
-  if (SecondLastInst && I != MBB.begin() &&
-      isUnpredicatedTerminator(--I))
+  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
     return true;
 
   // If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it.
   if (SecondLastInst->getOpcode() == NVPTX::CBranch &&
       LastInst->getOpcode() == NVPTX::GOTO) {
-    TBB =  SecondLastInst->getOperand(1).getMBB();
+    TBB = SecondLastInst->getOperand(1).getMBB();
     Cond.push_back(SecondLastInst->getOperand(0));
     FBB = LastInst->getOperand(0).getMBB();
     return false;
@@ -238,7 +226,8 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
 unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin()) return 0;
+  if (I == MBB.begin())
+    return 0;
   --I;
   if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch)
     return 0;
@@ -248,7 +237,8 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
 
   I = MBB.end();
 
-  if (I == MBB.begin()) return 1;
+  if (I == MBB.begin())
+    return 1;
   --I;
   if (I->getOpcode() != NVPTX::CBranch)
     return 1;
@@ -258,11 +248,9 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   return 2;
 }
 
-unsigned
-NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                             MachineBasicBlock *FBB,
-                             const SmallVectorImpl<MachineOperand> &Cond,
-                             DebugLoc DL) const {
+unsigned NVPTXInstrInfo::InsertBranch(
+    MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+    const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 1 || Cond.size() == 0) &&
@@ -270,17 +258,16 @@ NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
 
   // One-way branch.
   if (FBB == 0) {
-    if (Cond.empty())   // Unconditional branch
+    if (Cond.empty()) // Unconditional branch
       BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);
-    else                // Conditional branch
-      BuildMI(&MBB, DL, get(NVPTX::CBranch))
-      .addReg(Cond[0].getReg()).addMBB(TBB);
+    else // Conditional branch
+      BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg())
+          .addMBB(TBB);
     return 1;
   }
 
   // Two-way Conditional Branch.
-  BuildMI(&MBB, DL, get(NVPTX::CBranch))
-  .addReg(Cond[0].getReg()).addMBB(TBB);
+  BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()).addMBB(TBB);
   BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
   return 2;
 }
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index 7b8e218b05..b1972e9b72 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -23,8 +23,7 @@
 
 namespace llvm {
 
-class NVPTXInstrInfo : public NVPTXGenInstrInfo
-{
+class NVPTXInstrInfo : public NVPTXGenInstrInfo {
   NVPTXTargetMachine &TM;
   const NVPTXRegisterInfo RegInfo;
 public:
@@ -50,30 +49,26 @@ public:
    *                               const TargetRegisterClass *RC) const;
    */
 
-  virtual void copyPhysReg(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator I, DebugLoc DL,
-                           unsigned DestReg, unsigned SrcReg,
-                           bool KillSrc) const ;
-  virtual bool isMoveInstr(const MachineInstr &MI,
-                           unsigned &SrcReg,
+  virtual void copyPhysReg(
+      MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
+      unsigned DestReg, unsigned SrcReg, bool KillSrc) const;
+  virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
                            unsigned &DestReg) const;
   bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
   bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
   bool isReadSpecialReg(MachineInstr &MI) const;
 
-  virtual bool CanTailMerge(const MachineInstr *MI) const ;
+  virtual bool CanTailMerge(const MachineInstr *MI) const;
   // Branch analysis.
-  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
-                             MachineBasicBlock *&FBB,
-                             SmallVectorImpl<MachineOperand> &Cond,
-                             bool AllowModify) const;
+  virtual bool AnalyzeBranch(
+      MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+      SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
   virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-  virtual unsigned InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
-                                MachineBasicBlock *FBB,
-                                const SmallVectorImpl<MachineOperand> &Cond,
-                                DebugLoc DL) const;
+  virtual unsigned InsertBranch(
+      MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+      const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
   unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const {
-    return  MI.getOperand(2).getImm();
+    return MI.getOperand(2).getImm();
   }
 
 };
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index f7fa7aa61d..7c257b4c6a 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -25,18 +25,15 @@
 
 using namespace llvm;
 
-namespace llvm {
-FunctionPass *createLowerAggrCopies();
-}
+namespace llvm { FunctionPass *createLowerAggrCopies(); }
 
 char NVPTXLowerAggrCopies::ID = 0;
 
 // Lower MemTransferInst or load-store pair to loop
-static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr,
-                                  Value *dstAddr, Value *len,
-                                  //unsigned numLoads,
-                                  bool srcVolatile, bool dstVolatile,
-                                  LLVMContext &Context, Function &F) {
+static void convertTransferToLoop(
+    Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
+    //unsigned numLoads,
+    bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
   Type *indType = len->getType();
 
   BasicBlock *origBB = splitAt->getParent();
@@ -48,10 +45,8 @@ static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr,
 
   // srcAddr and dstAddr are expected to be pointer types,
   // so no check is made here.
-  unsigned srcAS =
-      dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
-  unsigned dstAS =
-      dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+  unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
+  unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
 
   // Cast pointers to (char *)
   srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
@@ -86,12 +81,11 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
   origBB->getTerminator()->setSuccessor(0, loopBB);
   IRBuilder<> builder(origBB, origBB->getTerminator());
 
-  unsigned dstAS =
-      dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
+  unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
 
   // Cast pointer to the type of value getting stored
-  dstAddr = builder.CreateBitCast(dstAddr,
-                                  PointerType::get(val->getType(), dstAS));
+  dstAddr =
+      builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS));
 
   IRBuilder<> loop(loopBB);
   PHINode *ind = loop.CreatePHI(len->getType(), 0);
@@ -120,24 +114,26 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
     //BasicBlock *bb = BI;
     for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
-        ++II) {
-      if (LoadInst * load = dyn_cast<LoadInst>(II)) {
+         ++II) {
+      if (LoadInst *load = dyn_cast<LoadInst>(II)) {
 
-        if (load->hasOneUse() == false) continue;
+        if (load->hasOneUse() == false)
+          continue;
 
-        if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue;
+        if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize)
+          continue;
 
         User *use = *(load->use_begin());
-        if (StoreInst * store = dyn_cast<StoreInst>(use)) {
+        if (StoreInst *store = dyn_cast<StoreInst>(use)) {
           if (store->getOperand(0) != load) //getValueOperand
-          continue;
+            continue;
           aggrLoads.push_back(load);
         }
-      } else if (MemTransferInst * intr = dyn_cast<MemTransferInst>(II)) {
+      } else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) {
         Value *len = intr->getLength();
         // If the number of elements being copied is greater
         // than MaxAggrCopySize, lower it to a loop
-        if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) {
+        if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
           if (len_int->getZExtValue() >= MaxAggrCopySize) {
             aggrMemcpys.push_back(intr);
           }
@@ -145,9 +141,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
           // turn variable length memcpy/memmov into loop
           aggrMemcpys.push_back(intr);
         }
-      } else if (MemSetInst * memsetintr = dyn_cast<MemSetInst>(II)) {
+      } else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) {
         Value *len = memsetintr->getLength();
-        if (ConstantInt * len_int = dyn_cast<ConstantInt>(len)) {
+        if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
           if (len_int->getZExtValue() >= MaxAggrCopySize) {
             aggrMemsets.push_back(memsetintr);
           }
@@ -158,8 +154,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
       }
     }
   }
-  if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0)
-      && (aggrMemsets.size() == 0)) return false;
+  if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) &&
+      (aggrMemsets.size() == 0))
+    return false;
 
   //
   // Do the transformation of an aggr load/copy/set to a loop
diff --git a/lib/Target/NVPTX/NVPTXNumRegisters.h b/lib/Target/NVPTX/NVPTXNumRegisters.h
index b4a4dbce98..a95c16b1e6 100644
--- a/lib/Target/NVPTX/NVPTXNumRegisters.h
+++ b/lib/Target/NVPTX/NVPTXNumRegisters.h
@@ -11,10 +11,6 @@
 #ifndef NVPTX_NUM_REGISTERS_H
 #define NVPTX_NUM_REGISTERS_H
 
-namespace llvm {
-
-const unsigned NVPTXNumRegisters = 396;
-
-}
+namespace llvm { const unsigned NVPTXNumRegisters = 396; }
 
 #endif
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 350a2c5551..282465359b 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -23,69 +23,54 @@
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
-
 using namespace llvm;
 
-namespace llvm
-{
-std::string getNVPTXRegClassName (TargetRegisterClass const *RC) {
+namespace llvm {
+std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
   if (RC == &NVPTX::Float32RegsRegClass) {
     return ".f32";
   }
   if (RC == &NVPTX::Float64RegsRegClass) {
     return ".f64";
-  }
-  else if (RC == &NVPTX::Int64RegsRegClass) {
+  } else if (RC == &NVPTX::Int64RegsRegClass) {
     return ".s64";
-  }
-  else if (RC == &NVPTX::Int32RegsRegClass) {
+  } else if (RC == &NVPTX::Int32RegsRegClass) {
     return ".s32";
-  }
-  else if (RC == &NVPTX::Int16RegsRegClass) {
+  } else if (RC == &NVPTX::Int16RegsRegClass) {
     return ".s16";
   }
-  // Int8Regs become 16-bit registers in PTX
-  else if (RC == &NVPTX::Int8RegsRegClass) {
+      // Int8Regs become 16-bit registers in PTX
+      else if (RC == &NVPTX::Int8RegsRegClass) {
     return ".s16";
-  }
-  else if (RC == &NVPTX::Int1RegsRegClass) {
+  } else if (RC == &NVPTX::Int1RegsRegClass) {
     return ".pred";
-  }
-  else if (RC == &NVPTX::SpecialRegsRegClass) {
+  } else if (RC == &NVPTX::SpecialRegsRegClass) {
     return "!Special!";
-  }
-  else {
+  } else {
     return "INTERNAL";
   }
   return "";
 }
 
-std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
+std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
   if (RC == &NVPTX::Float32RegsRegClass) {
     return "%f";
   }
   if (RC == &NVPTX::Float64RegsRegClass) {
     return "%fd";
-  }
-  else if (RC == &NVPTX::Int64RegsRegClass) {
+  } else if (RC == &NVPTX::Int64RegsRegClass) {
     return "%rd";
-  }
-  else if (RC == &NVPTX::Int32RegsRegClass) {
+  } else if (RC == &NVPTX::Int32RegsRegClass) {
     return "%r";
-  }
-  else if (RC == &NVPTX::Int16RegsRegClass) {
+  } else if (RC == &NVPTX::Int16RegsRegClass) {
     return "%rs";
-  }
-  else if (RC == &NVPTX::Int8RegsRegClass) {
+  } else if (RC == &NVPTX::Int8RegsRegClass) {
     return "%rc";
-  }
-  else if (RC == &NVPTX::Int1RegsRegClass) {
+  } else if (RC == &NVPTX::Int1RegsRegClass) {
     return "%p";
-  }
-  else if (RC == &NVPTX::SpecialRegsRegClass) {
+  } else if (RC == &NVPTX::SpecialRegsRegClass) {
     return "!Special!";
-  }
-  else {
+  } else {
     return "INTERNAL";
   }
   return "";
@@ -94,23 +79,22 @@ std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
 
 NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii,
                                      const NVPTXSubtarget &st)
-  : NVPTXGenRegisterInfo(0),
-    Is64Bit(st.is64Bit()) {}
+    : NVPTXGenRegisterInfo(0), Is64Bit(st.is64Bit()) {}
 
 #define GET_REGINFO_TARGET_DESC
 #include "NVPTXGenRegisterInfo.inc"
 
 /// NVPTX Callee Saved Registers
-const uint16_t* NVPTXRegisterInfo::
-getCalleeSavedRegs(const MachineFunction *MF) const {
+const uint16_t *
+NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   static const uint16_t CalleeSavedRegs[] = { 0 };
   return CalleeSavedRegs;
 }
 
 // NVPTX Callee Saved Reg Classes
-const TargetRegisterClass* const*
+const TargetRegisterClass *const *
 NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
-  static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
+  static const TargetRegisterClass *const CalleeSavedRegClasses[] = { 0 };
   return CalleeSavedRegClasses;
 }
 
@@ -119,10 +103,9 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
-void NVPTXRegisterInfo::
-eliminateFrameIndex(MachineBasicBlock::iterator II,
-                    int SPAdj, unsigned FIOperandNum,
-                    RegScavenger *RS) const {
+void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                            int SPAdj, unsigned FIOperandNum,
+                                            RegScavenger *RS) const {
   assert(SPAdj == 0 && "Unexpected");
 
   MachineInstr &MI = *II;
@@ -130,15 +113,14 @@ eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   MachineFunction &MF = *MI.getParent()->getParent();
   int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
-      MI.getOperand(FIOperandNum+1).getImm();
+               MI.getOperand(FIOperandNum + 1).getImm();
 
   // Using I0 as the frame pointer
   MI.getOperand(FIOperandNum).ChangeToRegister(NVPTX::VRFrame, false);
-  MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
+  MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
 }
 
-int NVPTXRegisterInfo::
-getDwarfRegNum(unsigned RegNum, bool isEH) const {
+int NVPTXRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
   return 0;
 }
 
@@ -146,7 +128,4 @@ unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return NVPTX::VRFrame;
 }
 
-unsigned NVPTXRegisterInfo::getRARegister() const {
-  return 0;
-}
-
+unsigned NVPTXRegisterInfo::getRARegister() const { return 0; }
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index 69f73f213c..d406820661 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -17,7 +17,6 @@
 #include "ManagedStringPool.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
-
 #define GET_REGINFO_HEADER
 #include "NVPTXGenRegisterInfo.inc"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -33,30 +32,28 @@ class NVPTXRegisterInfo : public NVPTXGenRegisterInfo {
 private:
   bool Is64Bit;
   // Hold Strings that can be free'd all together with NVPTXRegisterInfo
-  ManagedStringPool     ManagedStrPool;
+  ManagedStringPool ManagedStrPool;
 
 public:
-  NVPTXRegisterInfo(const TargetInstrInfo &tii,
-                    const NVPTXSubtarget &st);
-
+  NVPTXRegisterInfo(const TargetInstrInfo &tii, const NVPTXSubtarget &st);
 
   //------------------------------------------------------
   // Pure virtual functions from TargetRegisterInfo
   //------------------------------------------------------
 
   // NVPTX callee saved registers
-  virtual const uint16_t*
+  virtual const uint16_t *
   getCalleeSavedRegs(const MachineFunction *MF = 0) const;
 
   // NVPTX callee saved register classes
-  virtual const TargetRegisterClass* const *
+  virtual const TargetRegisterClass *const *
   getCalleeSavedRegClasses(const MachineFunction *MF) const;
 
   virtual BitVector getReservedRegs(const MachineFunction &MF) const;
 
-  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
-                                   int SPAdj, unsigned FIOperandNum,
-                                   RegScavenger *RS=NULL) const;
+  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+                                   unsigned FIOperandNum,
+                                   RegScavenger *RS = NULL) const;
 
   virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
   virtual unsigned getFrameRegister(const MachineFunction &MF) const;
@@ -74,11 +71,9 @@ public:
 
 };
 
-
-std::string getNVPTXRegClassName (const TargetRegisterClass *RC);
-std::string getNVPTXRegClassStr (const TargetRegisterClass *RC);
+std::string getNVPTXRegClassName(const TargetRegisterClass *RC);
+std::string getNVPTXRegClassStr(const TargetRegisterClass *RC);
 
 } // end namespace llvm
 
-
 #endif
diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
index babe29500d..83dfe12089 100644
--- a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
+++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
@@ -21,9 +21,7 @@
 
 using namespace llvm;
 
-namespace llvm {
-FunctionPass *createSplitBBatBarPass();
-}
+namespace llvm { FunctionPass *createSplitBBatBarPass(); }
 
 char NVPTXSplitBBatBar::ID = 0;
 
@@ -72,6 +70,4 @@ bool NVPTXSplitBBatBar::runOnFunction(Function &F) {
 // This interface will most likely not be necessary, because this pass will
 // not be invoked by the driver, but will be used as a prerequisite to
 // another pass.
-FunctionPass *llvm::createSplitBBatBarPass() {
-  return new NVPTXSplitBBatBar();
-}
+FunctionPass *llvm::createSplitBBatBarPass() { return new NVPTXSplitBBatBar(); }
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 7b62cce2c6..2dcd73dcff 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -22,27 +22,23 @@ using namespace llvm;
 // Select Driver Interface
 #include "llvm/Support/CommandLine.h"
 namespace {
-cl::opt<NVPTX::DrvInterface>
-DriverInterface(cl::desc("Choose driver interface:"),
-                cl::values(
-                    clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"),
-                    clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"),
-                    clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"),
-                    clEnumValEnd),
-                    cl::init(NVPTX::NVCL));
+cl::opt<NVPTX::DrvInterface> DriverInterface(
+    cl::desc("Choose driver interface:"),
+    cl::values(clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"),
+               clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"),
+               clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), clEnumValEnd),
+    cl::init(NVPTX::NVCL));
 }
 
 NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
                                const std::string &FS, bool is64Bit)
-: NVPTXGenSubtargetInfo(TT, CPU, FS),
-  Is64Bit(is64Bit),
-  PTXVersion(0),
-  SmVersion(10) {
+    : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0),
+      SmVersion(20) {
 
   drvInterface = DriverInterface;
 
   // Provide the default CPU if none
-  std::string defCPU = "sm_10";
+  std::string defCPU = "sm_20";
 
   ParseSubtargetFeatures((CPU.empty() ? defCPU : CPU), FS);
 
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index beea77e38d..670077daaa 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -25,7 +25,7 @@
 namespace llvm {
 
 class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
-  
+
   std::string TargetName;
   NVPTX::DrvInterface drvInterface;
   bool Is64Bit;
@@ -61,13 +61,10 @@ public:
   bool hasLDU() const { return SmVersion >= 20; }
   bool hasGenericLdSt() const { return SmVersion >= 20; }
   inline bool hasHWROT32() const { return false; }
-  inline bool hasSWROT32() const {
-    return true;
-  }
-  inline bool hasROT32() const { return hasHWROT32() || hasSWROT32() ; }
+  inline bool hasSWROT32() const { return true; }
+  inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
   inline bool hasROT64() const { return SmVersion >= 20; }
 
-
   bool is64Bit() const { return Is64Bit; }
 
   unsigned int getSmVersion() const { return SmVersion; }
@@ -96,4 +93,4 @@ public:
 
 } // End llvm namespace
 
-#endif  // NVPTXSUBTARGET_H
+#endif // NVPTXSUBTARGET_H
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index cd765fa8cb..67ca6b58e5 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -45,9 +45,11 @@
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Scalar.h"
 
-
 using namespace llvm;
 
+namespace llvm {
+void initializeNVVMReflectPass(PassRegistry&);
+}
 
 extern "C" void LLVMInitializeNVPTXTarget() {
   // Register the target.
@@ -57,52 +59,42 @@ extern "C" void LLVMInitializeNVPTXTarget() {
   RegisterMCAsmInfo<NVPTXMCAsmInfo> A(TheNVPTXTarget32);
   RegisterMCAsmInfo<NVPTXMCAsmInfo> B(TheNVPTXTarget64);
 
+  // FIXME: This pass is really intended to be invoked during IR optimization,
+  // but it's very NVPTX-specific.
+  initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
 }
 
-NVPTXTargetMachine::NVPTXTargetMachine(const Target &T,
-                                       StringRef TT,
-                                       StringRef CPU,
-                                       StringRef FS,
-                                       const TargetOptions& Options,
-                                       Reloc::Model RM,
-                                       CodeModel::Model CM,
-                                       CodeGenOpt::Level OL,
-                                       bool is64bit)
-: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
-  Subtarget(TT, CPU, FS, is64bit),
-  DL(Subtarget.getDataLayout()),
-  InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit)
-/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
-}
-
-
+NVPTXTargetMachine::NVPTXTargetMachine(
+    const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+    const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+    CodeGenOpt::Level OL, bool is64bit)
+    : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+      Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()),
+      InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+      FrameLowering(
+          *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {}
 
 void NVPTXTargetMachine32::anchor() {}
 
-NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, StringRef TT,
-                                           StringRef CPU, StringRef FS,
-                                           const TargetOptions &Options,
-                                           Reloc::Model RM, CodeModel::Model CM,
-                                           CodeGenOpt::Level OL)
-: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
-}
+NVPTXTargetMachine32::NVPTXTargetMachine32(
+    const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+    const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+    CodeGenOpt::Level OL)
+    : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
 
 void NVPTXTargetMachine64::anchor() {}
 
-NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, StringRef TT,
-                                           StringRef CPU, StringRef FS,
-                                           const TargetOptions &Options,
-                                           Reloc::Model RM, CodeModel::Model CM,
-                                           CodeGenOpt::Level OL)
-: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
-}
-
+NVPTXTargetMachine64::NVPTXTargetMachine64(
+    const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+    const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
+    CodeGenOpt::Level OL)
+    : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
 
 namespace llvm {
 class NVPTXPassConfig : public TargetPassConfig {
 public:
   NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
-  : TargetPassConfig(TM, PM) {}
+      : TargetPassConfig(TM, PM) {}
 
   NVPTXTargetMachine &getNVPTXTargetMachine() const {
     return getTM<NVPTXTargetMachine>();
@@ -126,6 +118,4 @@ bool NVPTXPassConfig::addInstSelector() {
   return false;
 }
 
-bool NVPTXPassConfig::addPreRegAlloc() {
-  return false;
-}
+bool NVPTXPassConfig::addPreRegAlloc() { return false; }
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index 1a732be1ad..5fbcf735b4 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #ifndef NVPTX_TARGETMACHINE_H
 #define NVPTX_TARGETMACHINE_H
 
@@ -31,42 +30,40 @@ namespace llvm {
 /// NVPTXTargetMachine
 ///
 class NVPTXTargetMachine : public LLVMTargetMachine {
-  NVPTXSubtarget        Subtarget;
-  const DataLayout      DL;       // Calculates type size & alignment
-  NVPTXInstrInfo        InstrInfo;
-  NVPTXTargetLowering   TLInfo;
-  TargetSelectionDAGInfo   TSInfo;
+  NVPTXSubtarget Subtarget;
+  const DataLayout DL; // Calculates type size & alignment
+  NVPTXInstrInfo InstrInfo;
+  NVPTXTargetLowering TLInfo;
+  TargetSelectionDAGInfo TSInfo;
 
   // NVPTX does not have any call stack frame, but need a NVPTX specific
   // FrameLowering class because TargetFrameLowering is abstract.
-  NVPTXFrameLowering       FrameLowering;
+  NVPTXFrameLowering FrameLowering;
 
   // Hold Strings that can be free'd all together with NVPTXTargetMachine
-  ManagedStringPool     ManagedStrPool;
+  ManagedStringPool ManagedStrPool;
 
   //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
   //                            bool DisableVerify, MCContext *&OutCtx);
 
 public:
-  NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU,
-                     StringRef FS, const TargetOptions &Options,
-                     Reloc::Model RM, CodeModel::Model CM,
-                     CodeGenOpt::Level OP,
-                     bool is64bit);
+  NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
+                     const TargetOptions &Options, Reloc::Model RM,
+                     CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
 
   virtual const TargetFrameLowering *getFrameLowering() const {
     return &FrameLowering;
   }
-  virtual const NVPTXInstrInfo *getInstrInfo() const  { return &InstrInfo; }
-  virtual const DataLayout *getDataLayout() const     { return &DL;}
-  virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;}
+  virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
+  virtual const DataLayout *getDataLayout() const { return &DL; }
+  virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
 
   virtual const NVPTXRegisterInfo *getRegisterInfo() const {
     return &(InstrInfo.getRegisterInfo());
   }
 
   virtual NVPTXTargetLowering *getTargetLowering() const {
-    return const_cast<NVPTXTargetLowering*>(&TLInfo);
+    return const_cast<NVPTXTargetLowering *>(&TLInfo);
   }
 
   virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
@@ -79,22 +76,19 @@ public:
   //virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level);
 
   ManagedStringPool *getManagedStrPool() const {
-    return const_cast<ManagedStringPool*>(&ManagedStrPool);
+    return const_cast<ManagedStringPool *>(&ManagedStrPool);
   }
 
   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
 
   // Emission of machine code through JITCodeEmitter is not supported.
-  virtual bool addPassesToEmitMachineCode(PassManagerBase &,
-                                          JITCodeEmitter &,
+  virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &,
                                           bool = true) {
     return true;
   }
 
   // Emission of machine code through MCJIT is not supported.
-  virtual bool addPassesToEmitMC(PassManagerBase &,
-                                 MCContext *&,
-                                 raw_ostream &,
+  virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
                                  bool = true) {
     return true;
   }
@@ -119,7 +113,6 @@ public:
                        CodeGenOpt::Level OL);
 };
 
-
 } // end namespace llvm
 
 #endif
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index b5698a2fc0..6ab0e08ad0 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -46,45 +46,43 @@ public:
   }
 
   virtual void Initialize(MCContext &ctx, const TargetMachine &TM) {
-    TextSection = new NVPTXSection(MCSection::SV_ELF,
-                                   SectionKind::getText());
-    DataSection = new NVPTXSection(MCSection::SV_ELF,
-                                   SectionKind::getDataRel());
-    BSSSection = new NVPTXSection(MCSection::SV_ELF,
-                                  SectionKind::getBSS());
-    ReadOnlySection = new NVPTXSection(MCSection::SV_ELF,
-                                       SectionKind::getReadOnly());
+    TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText());
+    DataSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel());
+    BSSSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getBSS());
+    ReadOnlySection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getReadOnly());
 
-    StaticCtorSection = new NVPTXSection(MCSection::SV_ELF,
-                                         SectionKind::getMetadata());
-    StaticDtorSection = new NVPTXSection(MCSection::SV_ELF,
-                                         SectionKind::getMetadata());
-    LSDASection = new NVPTXSection(MCSection::SV_ELF,
-                                   SectionKind::getMetadata());
-    EHFrameSection = new NVPTXSection(MCSection::SV_ELF,
-                                      SectionKind::getMetadata());
-    DwarfAbbrevSection = new NVPTXSection(MCSection::SV_ELF,
-                                          SectionKind::getMetadata());
-    DwarfInfoSection = new NVPTXSection(MCSection::SV_ELF,
-                                        SectionKind::getMetadata());
-    DwarfLineSection = new NVPTXSection(MCSection::SV_ELF,
-                                        SectionKind::getMetadata());
-    DwarfFrameSection = new NVPTXSection(MCSection::SV_ELF,
-                                         SectionKind::getMetadata());
-    DwarfPubTypesSection = new NVPTXSection(MCSection::SV_ELF,
-                                            SectionKind::getMetadata());
-    DwarfDebugInlineSection = new NVPTXSection(MCSection::SV_ELF,
-                                               SectionKind::getMetadata());
-    DwarfStrSection = new NVPTXSection(MCSection::SV_ELF,
-                                       SectionKind::getMetadata());
-    DwarfLocSection = new NVPTXSection(MCSection::SV_ELF,
-                                       SectionKind::getMetadata());
-    DwarfARangesSection = new NVPTXSection(MCSection::SV_ELF,
-                                           SectionKind::getMetadata());
-    DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF,
-                                          SectionKind::getMetadata());
-    DwarfMacroInfoSection = new NVPTXSection(MCSection::SV_ELF,
-                                             SectionKind::getMetadata());
+    StaticCtorSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    StaticDtorSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    LSDASection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    EHFrameSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfAbbrevSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfInfoSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfLineSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfFrameSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfPubTypesSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfDebugInlineSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfStrSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfLocSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfARangesSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfRangesSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+    DwarfMacroInfoSection =
+        new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
   }
 
   virtual const MCSection *getSectionForConstant(SectionKind Kind) const {
@@ -93,8 +91,7 @@ public:
 
   virtual const MCSection *
   getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                           Mangler *Mang,
-                           const TargetMachine &TM) const {
+                           Mangler *Mang, const TargetMachine &TM) const {
     return DataSection;
   }
 
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index 1ccc9f7c02..6786eb0224 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -34,7 +34,6 @@ typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;
 
 ManagedStatic<per_module_annot_t> annotationCache;
 
-
 static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
   assert(md && "Invalid mdnode for annotation");
   assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");
@@ -46,7 +45,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
     assert(prop && "Annotation property not a string");
 
     // value
-    ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i+1));
+    ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i + 1));
     assert(Val && "Value operand not a constant int");
 
     std::string keyname = prop->getString().str();
@@ -120,9 +119,9 @@ bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop,
 bool llvm::isTexture(const llvm::Value &val) {
   if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
     unsigned annot;
-    if (llvm::findOneNVVMAnnotation(gv,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
-                                   annot)) {
+    if (llvm::findOneNVVMAnnotation(
+            gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
+            annot)) {
       assert((annot == 1) && "Unexpected annotation on a texture symbol");
       return true;
     }
@@ -133,9 +132,9 @@ bool llvm::isTexture(const llvm::Value &val) {
 bool llvm::isSurface(const llvm::Value &val) {
   if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
     unsigned annot;
-    if (llvm::findOneNVVMAnnotation(gv,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
-                                   annot)) {
+    if (llvm::findOneNVVMAnnotation(
+            gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
+            annot)) {
       assert((annot == 1) && "Unexpected annotation on a surface symbol");
       return true;
     }
@@ -146,9 +145,9 @@ bool llvm::isSurface(const llvm::Value &val) {
 bool llvm::isSampler(const llvm::Value &val) {
   if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
     unsigned annot;
-    if (llvm::findOneNVVMAnnotation(gv,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
-                                   annot)) {
+    if (llvm::findOneNVVMAnnotation(
+            gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
+            annot)) {
       assert((annot == 1) && "Unexpected annotation on a sampler symbol");
       return true;
     }
@@ -156,9 +155,9 @@ bool llvm::isSampler(const llvm::Value &val) {
   if (const Argument *arg = dyn_cast<Argument>(&val)) {
     const Function *func = arg->getParent();
     std::vector<unsigned> annot;
-    if (llvm::findAllNVVMAnnotation(func,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
-                                   annot)) {
+    if (llvm::findAllNVVMAnnotation(
+            func, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
+            annot)) {
       if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
         return true;
     }
@@ -171,8 +170,9 @@ bool llvm::isImageReadOnly(const llvm::Value &val) {
     const Function *func = arg->getParent();
     std::vector<unsigned> annot;
     if (llvm::findAllNVVMAnnotation(func,
-          llvm::PropertyAnnotationNames[llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
-                                   annot)) {
+                                    llvm::PropertyAnnotationNames[
+                                        llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
+                                    annot)) {
       if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
         return true;
     }
@@ -185,8 +185,9 @@ bool llvm::isImageWriteOnly(const llvm::Value &val) {
     const Function *func = arg->getParent();
     std::vector<unsigned> annot;
     if (llvm::findAllNVVMAnnotation(func,
-         llvm::PropertyAnnotationNames[llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
-                                   annot)) {
+                                    llvm::PropertyAnnotationNames[
+                                        llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
+                                    annot)) {
       if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
         return true;
     }
@@ -214,52 +215,44 @@ std::string llvm::getSamplerName(const llvm::Value &val) {
 }
 
 bool llvm::getMaxNTIDx(const Function &F, unsigned &x) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X],
-                                      x));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], x));
 }
 
 bool llvm::getMaxNTIDy(const Function &F, unsigned &y) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y],
-                                      y));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], y));
 }
 
 bool llvm::getMaxNTIDz(const Function &F, unsigned &z) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z],
-                                      z));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], z));
 }
 
 bool llvm::getReqNTIDx(const Function &F, unsigned &x) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X],
-                                      x));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], x));
 }
 
 bool llvm::getReqNTIDy(const Function &F, unsigned &y) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y],
-                                      y));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], y));
 }
 
 bool llvm::getReqNTIDz(const Function &F, unsigned &z) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                       llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z],
-                                      z));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], z));
 }
 
 bool llvm::getMinCTASm(const Function &F, unsigned &x) {
-  return (llvm::findOneNVVMAnnotation(&F,
-                    llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM],
-                                      x));
+  return (llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], x));
 }
 
 bool llvm::isKernelFunction(const Function &F) {
   unsigned x = 0;
-  bool retval = llvm::findOneNVVMAnnotation(&F,
-               llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION],
-                                            x);
+  bool retval = llvm::findOneNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x);
   if (retval == false) {
     // There is no NVVM metadata, check the calling convention
     if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel)
@@ -267,20 +260,19 @@ bool llvm::isKernelFunction(const Function &F) {
     else
       return false;
   }
-  return (x==1);
+  return (x == 1);
 }
 
 bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
   std::vector<unsigned> Vs;
-  bool retval = llvm::findAllNVVMAnnotation(&F,
-                           llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN],
-                                            Vs);
+  bool retval = llvm::findAllNVVMAnnotation(
+      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs);
   if (retval == false)
     return false;
-  for (int i=0, e=Vs.size(); i<e; i++) {
+  for (int i = 0, e = Vs.size(); i < e; i++) {
     unsigned v = Vs[i];
-    if ( (v >> 16) == index ) {
-      align =  v & 0xFFFF;
+    if ((v >> 16) == index) {
+      align = v & 0xFFFF;
       return true;
     }
   }
@@ -289,16 +281,15 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
 
 bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
   if (MDNode *alignNode = I.getMetadata("callalign")) {
-    for (int i=0, n = alignNode->getNumOperands();
-        i<n; i++) {
+    for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) {
       if (const ConstantInt *CI =
-          dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
+              dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
         unsigned v = CI->getZExtValue();
-        if ( (v>>16) == index ) {
+        if ((v >> 16) == index) {
           align = v & 0xFFFF;
           return true;
         }
-        if ( (v>>16) > index ) {
+        if ((v >> 16) > index) {
           return false;
         }
       }
@@ -337,8 +328,8 @@ bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) {
 // consider several special intrinsics in striping pointer casts, and
 // provide an option to ignore GEP indicies for find out the base address only
 // which could be used in simple alias disambigurate.
-const Value *llvm::skipPointerTransfer(const Value *V,
-                                       bool ignore_GEP_indices) {
+const Value *
+llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) {
   V = V->stripPointerCasts();
   while (true) {
     if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
@@ -360,8 +351,8 @@ const Value *llvm::skipPointerTransfer(const Value *V,
 // - ignore GEP indicies for find out the base address only, and
 // - tracking PHINode
 // which could be used in simple alias disambigurate.
-const Value *llvm::skipPointerTransfer(const Value *V,
-                                       std::set<const Value *> &processed) {
+const Value *
+llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) {
   if (processed.find(V) != processed.end())
     return NULL;
   processed.insert(V);
@@ -406,7 +397,6 @@ const Value *llvm::skipPointerTransfer(const Value *V,
   return V;
 }
 
-
 // The following are some useful utilities for debuggung
 
 BasicBlock *llvm::getParentBlock(Value *v) {
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index 247e09b8bc..a208004297 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -23,8 +23,7 @@
 #include <string>
 #include <vector>
 
-namespace llvm
-{
+namespace llvm {
 
 #define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly"
 #define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly"
@@ -64,8 +63,7 @@ bool isBarrierIntrinsic(llvm::Intrinsic::ID);
 /// to pass into type construction of CallInst ctors.  This turns a null
 /// terminated list of pointers (or other value types) into a real live vector.
 ///
-template<typename T>
-inline std::vector<T> make_vector(T A, ...) {
+template <typename T> inline std::vector<T> make_vector(T A, ...) {
   va_list Args;
   va_start(Args, A);
   std::vector<T> Result;
@@ -78,8 +76,8 @@ inline std::vector<T> make_vector(T A, ...) {
 
 bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id);
 const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices);
-const Value *skipPointerTransfer(const Value *V,
-                                 std::set<const Value *> &processed);
+const Value *
+skipPointerTransfer(const Value *V, std::set<const Value *> &processed);
 BasicBlock *getParentBlock(Value *v);
 Function *getParentFunction(Value *v);
 void dumpBlock(Value *v, char *blockName);
diff --git a/lib/Target/NVPTX/NVPTXutil.cpp b/lib/Target/NVPTX/NVPTXutil.cpp
index 6a0e5328f6..5f074b33a2 100644
--- a/lib/Target/NVPTX/NVPTXutil.cpp
+++ b/lib/Target/NVPTX/NVPTXutil.cpp
@@ -18,8 +18,7 @@ using namespace llvm;
 
 namespace llvm {
 
-bool isParamLoad(const MachineInstr *MI)
-{
+bool isParamLoad(const MachineInstr *MI) {
   if ((MI->getOpcode() != NVPTX::LD_i32_avar) &&
       (MI->getOpcode() != NVPTX::LD_i64_avar))
     return false;
@@ -30,13 +29,11 @@ bool isParamLoad(const MachineInstr *MI)
   return true;
 }
 
-#define DATA_MASK     0x7f
-#define DIGIT_WIDTH   7
-#define MORE_BYTES    0x80
+#define DATA_MASK 0x7f
+#define DIGIT_WIDTH 7
+#define MORE_BYTES 0x80
 
-static int encode_leb128(uint64_t val, int *nbytes,
-                         char *space, int splen)
-{
+static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) {
   char *a;
   char *end = space + splen;
 
@@ -61,29 +58,30 @@ static int encode_leb128(uint64_t val, int *nbytes,
 #undef DIGIT_WIDTH
 #undef MORE_BYTES
 
-uint64_t encode_leb128(const char *str)
-{
-  union { uint64_t x; char a[8]; } temp64;
+uint64_t encode_leb128(const char *str) {
+  union {
+    uint64_t x;
+    char a[8];
+  } temp64;
 
   temp64.x = 0;
 
-  for (unsigned i=0,e=strlen(str); i!=e; ++i)
-    temp64.a[i] = str[e-1-i];
+  for (unsigned i = 0, e = strlen(str); i != e; ++i)
+    temp64.a[i] = str[e - 1 - i];
 
   char encoded[16];
   int nbytes;
 
   int retval = encode_leb128(temp64.x, &nbytes, encoded, 16);
 
-  (void)retval;
-  assert(retval == 0 &&
-         "Encoding to leb128 failed");
+  (void) retval;
+  assert(retval == 0 && "Encoding to leb128 failed");
 
   assert(nbytes <= 8 &&
          "Cannot support register names with leb128 encoding > 8 bytes");
 
   temp64.x = 0;
-  for (int i=0; i<nbytes; ++i)
+  for (int i = 0; i < nbytes; ++i)
     temp64.a[i] = encoded[i];
 
   return temp64.x;
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
new file mode 100644
index 0000000000..3bbd1a13da
--- /dev/null
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -0,0 +1,193 @@
+//===- NVVMReflect.cpp - NVVM Emulate conditional compilation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replaces occurences of __nvvm_reflect("string") with an
+// integer based on -nvvm-reflect-list string=<int> option given to this pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#define NVVM_REFLECT_FUNCTION "__nvvm_reflect"
+
+using namespace llvm;
+
+namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }
+
+namespace {
+class LLVM_LIBRARY_VISIBILITY NVVMReflect : public ModulePass {
+private:
+  //std::map<std::string, int> VarMap;
+  StringMap<int> VarMap;
+  typedef std::map<std::string, int>::iterator VarMapIter;
+  Function *reflectFunction;
+
+public:
+  static char ID;
+  NVVMReflect() : ModulePass(ID) {
+    VarMap.clear();
+    reflectFunction = 0;
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); }
+  virtual bool runOnModule(Module &);
+
+  void setVarMap();
+};
+}
+
+static cl::opt<bool>
+NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true),
+                   cl::desc("NVVM reflection, enabled by default"));
+
+char NVVMReflect::ID = 0;
+INITIALIZE_PASS(NVVMReflect, "nvvm-reflect",
+                "Replace occurences of __nvvm_reflect() calls with 0/1", false,
+                false)
+
+static cl::list<std::string>
+ReflectList("nvvm-reflect-list", cl::value_desc("name=0/1"),
+            cl::desc("A list of string=num assignments, where num=0 or 1"),
+            cl::ValueRequired);
+
+/// This function does the same operation as perl's split.
+/// For example, calling this with ("a=1,b=2,c=0", ",") will
+/// return ["a=1", "b=2", "c=0"] in the return std::vector.
+static std::vector<std::string>
+Tokenize(const std::string &str, const std::string &delim) {
+  std::vector<std::string> tokens;
+
+  size_t p0 = 0, p1 = std::string::npos;
+  while (p0 != std::string::npos) {
+    p1 = str.find_first_of(delim, p0);
+    if (p1 != p0) {
+      std::string token = str.substr(p0, p1 - p0);
+      tokens.push_back(token);
+    }
+    p0 = str.find_first_not_of(delim, p1);
+  }
+
+  return tokens;
+}
+
+/// The command line can look as follows :
+/// -R a=1,b=2 -R c=3,d=0 -R e=2
+/// The strings "a=1,b=2", "c=3,d=0", "e=2" are available in the
+/// ReflectList vector. First, each of ReflectList[i] is 'split'
+/// using "," as the delimiter. Then each of this part is split
+/// using "=" as the delimiter.
+void NVVMReflect::setVarMap() {
+  for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) {
+    //    DEBUG(dbgs() << "Option : "  << ReflectList[i] << std::endl);
+    std::vector<std::string> nameValList = Tokenize(ReflectList[i], ",");
+    for (unsigned j = 0, ej = nameValList.size(); j != ej; ++j) {
+      std::vector<std::string> nameValPair = Tokenize(nameValList[j], "=");
+      assert(nameValPair.size() == 2 && "name=val expected");
+      std::stringstream valstream(nameValPair[1]);
+      int val;
+      valstream >> val;
+      assert((!(valstream.fail())) && "integer value expected");
+      VarMap[nameValPair[0]] = val;
+    }
+  }
+}
+
+bool NVVMReflect::runOnModule(Module &M) {
+  if (!NVVMReflectEnabled)
+    return false;
+
+  setVarMap();
+
+  reflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION);
+
+  // If reflect function is not used, then there will be
+  // no entry in the module.
+  if (reflectFunction == 0) {
+    return false;
+  }
+
+  // Validate _reflect function
+  assert(reflectFunction->isDeclaration() &&
+         "_reflect function should not have a body");
+  assert(reflectFunction->getReturnType()->isIntegerTy() &&
+         "_reflect's return type should be integer");
+
+  std::vector<Instruction *> toRemove;
+
+  // Go through the uses of reflectFunction in this Function.
+  // Each of them should a CallInst with a ConstantArray argument.
+  // First validate that. If the c-string corresponding to the
+  // ConstantArray can be found successfully, see if it can be
+  // found in VarMap. If so, replace the uses of CallInst with the
+  // value found in VarMap. If not, replace the use  with value 0.
+  for (Value::use_iterator iter = reflectFunction->use_begin(),
+                           iterEnd = reflectFunction->use_end();
+       iter != iterEnd; ++iter) {
+    assert(isa<CallInst>(*iter) && "Only a call instruction can use _reflect");
+    CallInst *reflect = cast<CallInst>(*iter);
+
+    assert((reflect->getNumOperands() == 2) &&
+           "Only one operand expect for _reflect function");
+    // In cuda, we will have an extra constant-to-generic conversion of
+    // the string.
+    const Value *conv = reflect->getArgOperand(0);
+    assert(isa<CallInst>(conv) && "Expected a const-to-gen conversion");
+    const CallInst *convcall = cast<CallInst>(conv);
+    const Value *str = convcall->getArgOperand(0);
+    assert(isa<ConstantExpr>(str) &&
+           "Format of _reflect function not recognized");
+    const ConstantExpr *gep = cast<ConstantExpr>(str);
+
+    const Value *sym = gep->getOperand(0);
+    assert(isa<Constant>(sym) && "Format of _reflect function not recognized");
+
+    const Constant *symstr = cast<Constant>(sym);
+
+    assert(isa<ConstantDataSequential>(symstr->getOperand(0)) &&
+           "Format of _reflect function not recognized");
+
+    assert(cast<ConstantDataSequential>(symstr->getOperand(0))->isCString() &&
+           "Format of _reflect function not recognized");
+
+    std::string reflectArg =
+        cast<ConstantDataSequential>(symstr->getOperand(0))->getAsString();
+
+    reflectArg = reflectArg.substr(0, reflectArg.size() - 1);
+    //    DEBUG(dbgs() << "Arg of _reflect : " << reflectArg << std::endl);
+
+    int reflectVal = 0; // The default value is 0
+    if (VarMap.find(reflectArg) != VarMap.end()) {
+      reflectVal = VarMap[reflectArg];
+    }
+    reflect->replaceAllUsesWith(
+        ConstantInt::get(reflect->getType(), reflectVal));
+    toRemove.push_back(reflect);
+  }
+  if (toRemove.size() == 0)
+    return false;
+
+  for (unsigned i = 0, e = toRemove.size(); i != e; ++i)
+    toRemove[i]->eraseFromParent();
+  return true;
+}
diff --git a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
index 6c801b875e..cc7d4dc5ec 100644
--- a/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
+++ b/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
@@ -17,7 +17,7 @@ Target llvm::TheNVPTXTarget64;
 
 extern "C" void LLVMInitializeNVPTXTargetInfo() {
   RegisterTarget<Triple::nvptx> X(TheNVPTXTarget32, "nvptx",
-    "NVIDIA PTX 32-bit");
+                                  "NVIDIA PTX 32-bit");
   RegisterTarget<Triple::nvptx64> Y(TheNVPTXTarget64, "nvptx64",
-    "NVIDIA PTX 64-bit");
+                                    "NVIDIA PTX 64-bit");
 }
diff --git a/lib/Target/NVPTX/cl_common_defines.h b/lib/Target/NVPTX/cl_common_defines.h
index a7347efd78..45cc0b8b67 100644
--- a/lib/Target/NVPTX/cl_common_defines.h
+++ b/lib/Target/NVPTX/cl_common_defines.h
@@ -24,22 +24,21 @@ enum {
   CLK_LUMINANCE = 0x10B9
 
 #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
-  ,
+      ,
   CLK_Rx = 0x10BA,
   CLK_RGx = 0x10BB,
   CLK_RGBx = 0x10BC
 #endif
 };
 
-
 typedef enum clk_channel_type {
   // valid formats for float return types
-  CLK_SNORM_INT8 = 0x10D0,            // four channel RGBA unorm8
-  CLK_SNORM_INT16 = 0x10D1,           // four channel RGBA unorm16
-  CLK_UNORM_INT8 = 0x10D2,            // four channel RGBA unorm8
-  CLK_UNORM_INT16 = 0x10D3,           // four channel RGBA unorm16
-  CLK_HALF_FLOAT = 0x10DD,            // four channel RGBA half
-  CLK_FLOAT = 0x10DE,                 // four channel RGBA float
+  CLK_SNORM_INT8 = 0x10D0,  // four channel RGBA unorm8
+  CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16
+  CLK_UNORM_INT8 = 0x10D2,  // four channel RGBA unorm8
+  CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16
+  CLK_HALF_FLOAT = 0x10DD,  // four channel RGBA half
+  CLK_FLOAT = 0x10DE,       // four channel RGBA float
 
 #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
   CLK_UNORM_SHORT_565 = 0x10D4,
@@ -48,7 +47,7 @@ typedef enum clk_channel_type {
 #endif
 
   // valid only for integer return types
-  CLK_SIGNED_INT8 =  0x10D7,
+  CLK_SIGNED_INT8 = 0x10D7,
   CLK_SIGNED_INT16 = 0x10D8,
   CLK_SIGNED_INT32 = 0x10D9,
   CLK_UNSIGNED_INT8 = 0x10DA,
@@ -56,70 +55,68 @@ typedef enum clk_channel_type {
   CLK_UNSIGNED_INT32 = 0x10DC,
 
   // CI SPI for CPU
-  __CLK_UNORM_INT8888 ,         // four channel ARGB unorm8
-  __CLK_UNORM_INT8888R,        // four channel BGRA unorm8
+  __CLK_UNORM_INT8888,  // four channel ARGB unorm8
+  __CLK_UNORM_INT8888R, // four channel BGRA unorm8
 
   __CLK_VALID_IMAGE_TYPE_COUNT,
   __CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT,
-  __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4,         // number of bits required to
-                                                // represent any image type
-  __CLK_VALID_IMAGE_TYPE_MASK = ( 1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS ) - 1
-}clk_channel_type;
+  __CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to
+                                        // represent any image type
+  __CLK_VALID_IMAGE_TYPE_MASK = (1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS) - 1
+} clk_channel_type;
 
 typedef enum clk_sampler_type {
-    __CLK_ADDRESS_BASE             = 0,
-    CLK_ADDRESS_NONE               = 0 << __CLK_ADDRESS_BASE,
-    CLK_ADDRESS_CLAMP              = 1 << __CLK_ADDRESS_BASE,
-    CLK_ADDRESS_CLAMP_TO_EDGE      = 2 << __CLK_ADDRESS_BASE,
-    CLK_ADDRESS_REPEAT             = 3 << __CLK_ADDRESS_BASE,
-    CLK_ADDRESS_MIRROR             = 4 << __CLK_ADDRESS_BASE,
+  __CLK_ADDRESS_BASE = 0,
+  CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE,
+  CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE,
+  CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE,
+  CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE,
+  CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE,
 
 #if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
-    CLK_ADDRESS_MIRRORED_REPEAT    = CLK_ADDRESS_MIRROR,
+  CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR,
 #endif
-    __CLK_ADDRESS_MASK             = CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP |
-                                     CLK_ADDRESS_CLAMP_TO_EDGE |
-                                     CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR,
-    __CLK_ADDRESS_BITS             = 3,        // number of bits required to
-                                               // represent address info
-
-    __CLK_NORMALIZED_BASE          = __CLK_ADDRESS_BITS,
-    CLK_NORMALIZED_COORDS_FALSE    = 0,
-    CLK_NORMALIZED_COORDS_TRUE     = 1 << __CLK_NORMALIZED_BASE,
-    __CLK_NORMALIZED_MASK          = CLK_NORMALIZED_COORDS_FALSE |
-                                     CLK_NORMALIZED_COORDS_TRUE,
-    __CLK_NORMALIZED_BITS          = 1,        // number of bits required to
-                                               // represent normalization
-
-    __CLK_FILTER_BASE              = __CLK_NORMALIZED_BASE +
-                                     __CLK_NORMALIZED_BITS,
-    CLK_FILTER_NEAREST             = 0 << __CLK_FILTER_BASE,
-    CLK_FILTER_LINEAR              = 1 << __CLK_FILTER_BASE,
-    CLK_FILTER_ANISOTROPIC         = 2 << __CLK_FILTER_BASE,
-    __CLK_FILTER_MASK              = CLK_FILTER_NEAREST | CLK_FILTER_LINEAR |
-                                     CLK_FILTER_ANISOTROPIC,
-    __CLK_FILTER_BITS              = 2,        // number of bits required to
-                                               // represent address info
-
-    __CLK_MIP_BASE                 = __CLK_FILTER_BASE + __CLK_FILTER_BITS,
-    CLK_MIP_NEAREST                = 0 << __CLK_MIP_BASE,
-    CLK_MIP_LINEAR                 = 1 << __CLK_MIP_BASE,
-    CLK_MIP_ANISOTROPIC            = 2 << __CLK_MIP_BASE,
-    __CLK_MIP_MASK                 = CLK_MIP_NEAREST | CLK_MIP_LINEAR |
-                                     CLK_MIP_ANISOTROPIC,
-    __CLK_MIP_BITS                 = 2,
-
-    __CLK_SAMPLER_BITS             = __CLK_MIP_BASE + __CLK_MIP_BITS,
-    __CLK_SAMPLER_MASK             = __CLK_MIP_MASK | __CLK_FILTER_MASK |
-                                     __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
-
-    __CLK_ANISOTROPIC_RATIO_BITS   = 5,
-    __CLK_ANISOTROPIC_RATIO_MASK   = (int) 0x80000000 >>
-                                      (__CLK_ANISOTROPIC_RATIO_BITS-1)
+  __CLK_ADDRESS_MASK =
+      CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | CLK_ADDRESS_CLAMP_TO_EDGE |
+      CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR,
+  __CLK_ADDRESS_BITS = 3, // number of bits required to
+                          // represent address info
+
+  __CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS,
+  CLK_NORMALIZED_COORDS_FALSE = 0,
+  CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE,
+  __CLK_NORMALIZED_MASK =
+      CLK_NORMALIZED_COORDS_FALSE | CLK_NORMALIZED_COORDS_TRUE,
+  __CLK_NORMALIZED_BITS = 1, // number of bits required to
+                             // represent normalization
+
+  __CLK_FILTER_BASE = __CLK_NORMALIZED_BASE + __CLK_NORMALIZED_BITS,
+  CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE,
+  CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE,
+  CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE,
+  __CLK_FILTER_MASK =
+      CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | CLK_FILTER_ANISOTROPIC,
+  __CLK_FILTER_BITS = 2, // number of bits required to
+                         // represent address info
+
+  __CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS,
+  CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE,
+  CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE,
+  CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE,
+  __CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR | CLK_MIP_ANISOTROPIC,
+  __CLK_MIP_BITS = 2,
+
+  __CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS,
+  __CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK |
+                       __CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
+
+  __CLK_ANISOTROPIC_RATIO_BITS = 5,
+  __CLK_ANISOTROPIC_RATIO_MASK =
+      (int) 0x80000000 >> (__CLK_ANISOTROPIC_RATIO_BITS - 1)
 } clk_sampler_type;
 
 // Memory synchronization
-#define CLK_LOCAL_MEM_FENCE     (1 << 0)
-#define CLK_GLOBAL_MEM_FENCE    (1 << 1)
+#define CLK_LOCAL_MEM_FENCE (1 << 0)
+#define CLK_GLOBAL_MEM_FENCE (1 << 1)
 
 #endif // __CL_COMMON_DEFINES_H__