19 files changed, 1467 insertions, 6 deletions
diff --git a/include/clang/Basic/TargetCXXABI.h b/include/clang/Basic/TargetCXXABI.h
index 3c6677c78a..c9d28f8774 100644
--- a/include/clang/Basic/TargetCXXABI.h
+++ b/include/clang/Basic/TargetCXXABI.h
@@ -63,6 +63,14 @@ public:
     ///   - constructor/destructor signatures.
     iOS,
 
+    /// The generic AArch64 ABI is also a modified version of the Itanium ABI,
+    /// but it has fewer divergences than the 32-bit ARM ABI.
+    ///
+    /// The relevant changes from the generic ABI in this case are:
+    ///   - representation of member function pointers adjusted as in ARM.
+    ///   - guard variables  are smaller.
+    GenericAArch64,
+
     /// The Microsoft ABI is the ABI used by Microsoft Visual Studio (and
     /// compatible compilers).
     ///
@@ -93,6 +101,7 @@ public:
   /// \brief Does this ABI generally fall into the Itanium family of ABIs?
   bool isItaniumFamily() const {
     switch (getKind()) {
+    case GenericAArch64:
     case GenericItanium:
     case GenericARM:
     case iOS:
@@ -107,6 +116,7 @@ public:
   /// \brief Is this ABI an MSVC-compatible ABI?
   bool isMicrosoft() const {
     switch (getKind()) {
+    case GenericAArch64:
     case GenericItanium:
     case GenericARM:
     case iOS:
@@ -175,6 +185,7 @@ public:
     case GenericARM:
       return false;
 
+    case GenericAArch64:
     case GenericItanium:
     case iOS:   // old iOS compilers did not follow this rule
     case Microsoft:
@@ -220,6 +231,7 @@ public:
     // permanently locked the definition of POD to the rules of C++ TR1,
     // and that trickles down to all the derived ABIs.
     case GenericItanium:
+    case GenericAArch64:
     case GenericARM:
     case iOS:
       return UseTailPaddingUnlessPOD03;
diff --git a/include/clang/Basic/TargetInfo.h b/include/clang/Basic/TargetInfo.h
index 716d776df6..deaa3eeb77 100644
--- a/include/clang/Basic/TargetInfo.h
+++ b/include/clang/Basic/TargetInfo.h
@@ -136,6 +136,10 @@ public:
     /// typedef void* __builtin_va_list;
     VoidPtrBuiltinVaList,
 
+    /// __builtin_va_list as defind by the AArch64 ABI
+    /// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055a/IHI0055A_aapcs64.pdf
+    AArch64ABIBuiltinVaList,
+
     /// __builtin_va_list as defined by the PNaCl ABI:
     /// http://www.chromium.org/nativeclient/pnacl/bitcode-abi#TOC-Machine-Types
     PNaClABIBuiltinVaList,
diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp
index 8091d685f7..5d15573038 100644
--- a/lib/AST/ASTContext.cpp
+++ b/lib/AST/ASTContext.cpp
@@ -597,6 +597,7 @@ CXXABI *ASTContext::createCXXABI(const TargetInfo &T) {
   case TargetCXXABI::GenericARM:
   case TargetCXXABI::iOS:
     return CreateARMCXXABI(*this);
+  case TargetCXXABI::GenericAArch64: // Same as Itanium at this level
   case TargetCXXABI::GenericItanium:
     return CreateItaniumCXXABI(*this);
   case TargetCXXABI::Microsoft:
@@ -5563,6 +5564,85 @@ static TypedefDecl *CreateVoidPtrBuiltinVaListDecl(const ASTContext *Context) {
   return VaListTypeDecl;
 }
 
+static TypedefDecl *
+CreateAArch64ABIBuiltinVaListDecl(const ASTContext *Context) {
+  RecordDecl *VaListTagDecl;
+  if (Context->getLangOpts().CPlusPlus) {
+    // namespace std { struct __va_list {
+    NamespaceDecl *NS;
+    NS = NamespaceDecl::Create(const_cast<ASTContext &>(*Context),
+                               Context->getTranslationUnitDecl(),
+                               /*Inline*/false, SourceLocation(),
+                               SourceLocation(), &Context->Idents.get("std"),
+                               /*PrevDecl*/0);
+
+    VaListTagDecl = CXXRecordDecl::Create(*Context, TTK_Struct,
+                                          Context->getTranslationUnitDecl(),
+                                          SourceLocation(), SourceLocation(),
+                                          &Context->Idents.get("__va_list"));
+    VaListTagDecl->setDeclContext(NS);
+  } else {
+    // struct __va_list
+    VaListTagDecl = CreateRecordDecl(*Context, TTK_Struct,
+                                   Context->getTranslationUnitDecl(),
+                                   &Context->Idents.get("__va_list"));
+  }
+
+  VaListTagDecl->startDefinition();
+
+  const size_t NumFields = 5;
+  QualType FieldTypes[NumFields];
+  const char *FieldNames[NumFields];
+
+  // void *__stack;
+  FieldTypes[0] = Context->getPointerType(Context->VoidTy);
+  FieldNames[0] = "__stack";
+
+  // void *__gr_top;
+  FieldTypes[1] = Context->getPointerType(Context->VoidTy);
+  FieldNames[1] = "__gr_top";
+
+  // void *__vr_top;
+  FieldTypes[2] = Context->getPointerType(Context->VoidTy);
+  FieldNames[2] = "__vr_top";
+
+  // int __gr_offs;
+  FieldTypes[3] = Context->IntTy;
+  FieldNames[3] = "__gr_offs";
+
+  // int __vr_offs;
+  FieldTypes[4] = Context->IntTy;
+  FieldNames[4] = "__vr_offs";
+
+  // Create fields
+  for (unsigned i = 0; i < NumFields; ++i) {
+    FieldDecl *Field = FieldDecl::Create(const_cast<ASTContext &>(*Context),
+                                         VaListTagDecl,
+                                         SourceLocation(),
+                                         SourceLocation(),
+                                         &Context->Idents.get(FieldNames[i]),
+                                         FieldTypes[i], /*TInfo=*/0,
+                                         /*BitWidth=*/0,
+                                         /*Mutable=*/false,
+                                         ICIS_NoInit);
+    Field->setAccess(AS_public);
+    VaListTagDecl->addDecl(Field);
+  }
+  VaListTagDecl->completeDefinition();
+  QualType VaListTagType = Context->getRecordType(VaListTagDecl);
+  Context->VaListTagTy = VaListTagType;
+
+  // } __builtin_va_list;
+  TypedefDecl *VaListTypedefDecl
+    = TypedefDecl::Create(const_cast<ASTContext &>(*Context),
+                          Context->getTranslationUnitDecl(),
+                          SourceLocation(), SourceLocation(),
+                          &Context->Idents.get("__builtin_va_list"),
+                          Context->getTrivialTypeSourceInfo(VaListTagType));
+
+  return VaListTypedefDecl;
+}
+
 static TypedefDecl *CreatePowerABIBuiltinVaListDecl(const ASTContext *Context) {
   // typedef struct __va_list_tag {
   RecordDecl *VaListTagDecl;
@@ -5796,6 +5876,8 @@ static TypedefDecl *CreateVaListDecl(const ASTContext *Context,
     return CreateCharPtrBuiltinVaListDecl(Context);
   case TargetInfo::VoidPtrBuiltinVaList:
     return CreateVoidPtrBuiltinVaListDecl(Context);
+  case TargetInfo::AArch64ABIBuiltinVaList:
+    return CreateAArch64ABIBuiltinVaListDecl(Context);
   case TargetInfo::PowerABIBuiltinVaList:
     return CreatePowerABIBuiltinVaListDecl(Context);
   case TargetInfo::X86_64ABIBuiltinVaList:
@@ -7642,6 +7724,7 @@ bool ASTContext::isNearlyEmpty(const CXXRecordDecl *RD) const {
 
 MangleContext *ASTContext::createMangleContext() {
   switch (Target->getCXXABI().getKind()) {
+  case TargetCXXABI::GenericAArch64:
   case TargetCXXABI::GenericItanium:
   case TargetCXXABI::GenericARM:
   case TargetCXXABI::iOS:
diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp
index b90d49ec84..27984dfbf4 100644
--- a/lib/Basic/Targets.cpp
+++ b/lib/Basic/Targets.cpp
@@ -3020,6 +3020,186 @@ public:
      Int64Type = SignedLongLong;
   }
 };
+}
+
+namespace {
+class AArch64TargetInfo : public TargetInfo {
+  static const char * const GCCRegNames[];
+  static const TargetInfo::GCCRegAlias GCCRegAliases[];
+public:
+  AArch64TargetInfo(const std::string& triple) : TargetInfo(triple) {
+    BigEndian = false;
+    LongWidth = LongAlign = 64;
+    LongDoubleWidth = LongDoubleAlign = 128;
+    PointerWidth = PointerAlign = 64;
+    SuitableAlign = 128;
+    DescriptionString = "e-p:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
+                        "i64:64:64-i128:128:128-f32:32:32-f64:64:64-"
+                        "f128:128:128-n32:64-S128";
+
+    WCharType = UnsignedInt;
+    LongDoubleFormat = &llvm::APFloat::IEEEquad;
+
+    TheCXXABI.set(TargetCXXABI::GenericAArch64);
+  }
+  virtual void getTargetDefines(const LangOptions &Opts,
+                                MacroBuilder &Builder) const {
+    // GCC defines theses currently
+    Builder.defineMacro("__aarch64__");
+    Builder.defineMacro("__AARCH64EL__");
+
+    // ACLE predefines. Many can only have one possible value on v8 AArch64.
+
+    // FIXME: these were written based on an unreleased version of a 32-bit ACLE
+    // which was intended to be compatible with a 64-bit implementation. They
+    // will need updating when a real 64-bit ACLE exists. Particularly pressing
+    // instances are: __AARCH_ISA_A32, __AARCH_ISA_T32, __ARCH_PCS.
+    Builder.defineMacro("__AARCH_ACLE",    "101");
+    Builder.defineMacro("__AARCH",         "8");
+    Builder.defineMacro("__AARCH_PROFILE", "'A'");
+
+    Builder.defineMacro("__AARCH_FEATURE_UNALIGNED");
+    Builder.defineMacro("__AARCH_FEATURE_CLZ");
+    Builder.defineMacro("__AARCH_FEATURE_FMA");
+
+    // FIXME: ACLE 1.1 reserves bit 4. Will almost certainly come to mean
+    // 128-bit LDXP present, at which point this becomes 0x1f.
+    Builder.defineMacro("__AARCH_FEATURE_LDREX", "0xf");
+
+    // 0xe implies support for half, single and double precision operations.
+    Builder.defineMacro("__AARCH_FP", "0xe");
+
+    // PCS specifies this for SysV variants, which is all we support. Other ABIs
+    // may choose __AARCH_FP16_FORMAT_ALTERNATIVE.
+    Builder.defineMacro("__AARCH_FP16_FORMAT_IEEE");
+
+    if (Opts.FastMath || Opts.FiniteMathOnly)
+      Builder.defineMacro("__AARCH_FP_FAST");
+
+    if ((Opts.C99 || Opts.C11) && !Opts.Freestanding)
+      Builder.defineMacro("__AARCH_FP_FENV_ROUNDING");
+
+    Builder.defineMacro("__AARCH_SIZEOF_WCHAR_T",
+                        Opts.ShortWChar ? "2" : "4");
+
+    Builder.defineMacro("__AARCH_SIZEOF_MINIMAL_ENUM",
+                        Opts.ShortEnums ? "1" : "4");
+
+    if (BigEndian)
+      Builder.defineMacro("__AARCH_BIG_ENDIAN");
+  }
+  virtual void getTargetBuiltins(const Builtin::Info *&Records,
+                                 unsigned &NumRecords) const {
+    Records = 0;
+    NumRecords = 0;
+  }
+  virtual bool hasFeature(StringRef Feature) const {
+    return Feature == "aarch64";
+  }
+  virtual void getGCCRegNames(const char * const *&Names,
+                              unsigned &NumNames) const;
+  virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
+                                unsigned &NumAliases) const;
+
+  virtual bool isCLZForZeroUndef() const { return false; }
+
+  virtual bool validateAsmConstraint(const char *&Name,
+                                     TargetInfo::ConstraintInfo &Info) const {
+    switch (*Name) {
+    default: return false;
+    case 'w': // An FP/SIMD vector register
+      Info.setAllowsRegister();
+      return true;
+    case 'I': // Constant that can be used with an ADD instruction
+    case 'J': // Constant that can be used with a SUB instruction
+    case 'K': // Constant that can be used with a 32-bit logical instruction
+    case 'L': // Constant that can be used with a 64-bit logical instruction
+    case 'M': // Constant that can be used as a 32-bit MOV immediate
+    case 'N': // Constant that can be used as a 64-bit MOV immediate
+    case 'Y': // Floating point constant zero
+    case 'Z': // Integer constant zero
+      return true;
+    case 'Q': // A memory reference with base register and no offset
+      Info.setAllowsMemory();
+      return true;
+    case 'S': // A symbolic address
+      Info.setAllowsRegister();
+      return true;
+    case 'U':
+      // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes, whatever they may be
+      // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be
+      // Usa: An absolute symbolic address
+      // Ush: The high part (bits 32:12) of a pc-relative symbolic address
+      llvm_unreachable("FIXME: Unimplemented support for bizarre constraints");
+    }
+  }
+
+  virtual const char *getClobbers() const {
+    // There are no AArch64 clobbers shared by all asm statements.
+    return "";
+  }
+
+  virtual BuiltinVaListKind getBuiltinVaListKind() const {
+    return TargetInfo::AArch64ABIBuiltinVaList;
+  }
+};
+
+const char * const AArch64TargetInfo::GCCRegNames[] = {
+  "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
+  "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
+  "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
+  "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wsp", "wzr",
+
+  "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
+  "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
+  "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+  "x24", "x25", "x26", "x27", "x28", "x29", "x30", "sp", "xzr",
+
+  "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
+  "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
+  "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
+  "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31",
+
+  "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
+  "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
+  "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
+  "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31",
+
+  "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+  "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
+  "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
+  "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
+
+  "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+  "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
+  "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
+  "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31",
+
+  "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
+  "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
+  "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
+  "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31"
+};
+
+void AArch64TargetInfo::getGCCRegNames(const char * const *&Names,
+                                       unsigned &NumNames) const {
+  Names = GCCRegNames;
+  NumNames = llvm::array_lengthof(GCCRegNames);
+}
+
+const TargetInfo::GCCRegAlias AArch64TargetInfo::GCCRegAliases[] = {
+  { { "x16" }, "ip0"},
+  { { "x17" }, "ip1"},
+  { { "x29" }, "fp" },
+  { { "x30" }, "lr" }
+};
+
+void AArch64TargetInfo::getGCCRegAliases(const GCCRegAlias *&Aliases,
+                                         unsigned &NumAliases) const {
+  Aliases = GCCRegAliases;
+  NumAliases = llvm::array_lengthof(GCCRegAliases);
+
+}
 } // end anonymous namespace
 
 namespace {
@@ -4540,6 +4720,14 @@ static TargetInfo *AllocateTarget(const std::string &T) {
   case llvm::Triple::hexagon:
     return new HexagonTargetInfo(T);
 
+  case llvm::Triple::aarch64:
+    switch (os) {
+    case llvm::Triple::Linux:
+      return new LinuxTargetInfo<AArch64TargetInfo>(T);
+    default:
+      return new AArch64TargetInfo(T);
+    }
+
   case llvm::Triple::arm:
   case llvm::Triple::thumb:
     if (Triple.isOSDarwin())
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h
index 1da1689a65..10e2f60c86 100644
--- a/lib/CodeGen/ABIInfo.h
+++ b/lib/CodeGen/ABIInfo.h
@@ -102,8 +102,10 @@ namespace clang {
       return ABIArgInfo(Ignore, 0, 0, false, false, false, false, 0);
     }
     static ABIArgInfo getIndirect(unsigned Alignment, bool ByVal = true
-                                  , bool Realign = false) {
-      return ABIArgInfo(Indirect, 0, Alignment, ByVal, Realign, false, false, 0);
+                                  , bool Realign = false
+                                  , llvm::Type *Padding = 0) {
+      return ABIArgInfo(Indirect, 0, Alignment, ByVal, Realign, false, false, 
+                        Padding);
     }
     static ABIArgInfo getIndirectInReg(unsigned Alignment, bool ByVal = true
                                   , bool Realign = false) {
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 80b52978c7..91f22b1cd6 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -54,6 +54,7 @@ static const char AnnotationSection[] = "llvm.metadata";
 
 static CGCXXABI &createCXXABI(CodeGenModule &CGM) {
   switch (CGM.getContext().getTargetInfo().getCXXABI().getKind()) {
+  case TargetCXXABI::GenericAArch64:
   case TargetCXXABI::GenericARM:
   case TargetCXXABI::iOS:
   case TargetCXXABI::GenericItanium:
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index fa25a2707d..a3d8cec344 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -181,6 +181,12 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) {
   case TargetCXXABI::iOS:
     return new ARMCXXABI(CGM);
 
+  // Note that AArch64 uses the generic ItaniumCXXABI class since it doesn't
+  // include the other 32-bit ARM oddities: constructor/destructor return values
+  // and array cookies.
+  case TargetCXXABI::GenericAArch64:
+    return  new ItaniumCXXABI(CGM, /*IsARM = */ true);
+
   case TargetCXXABI::GenericItanium:
     return new ItaniumCXXABI(CGM);
 
@@ -1015,8 +1021,9 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
   if (useInt8GuardVariable) {
     guardTy = CGF.Int8Ty;
   } else {
-    // Guard variables are 64 bits in the generic ABI and 32 bits on ARM.
-    guardTy = (IsARM ? CGF.Int32Ty : CGF.Int64Ty);
+    // Guard variables are 64 bits in the generic ABI and size width on ARM
+    // (i.e. 32-bit on AArch32, 64-bit on AArch64).
+    guardTy = (IsARM ? CGF.SizeTy : CGF.Int64Ty);
   }
   llvm::PointerType *guardPtrTy = guardTy->getPointerTo();
 
@@ -1059,7 +1066,8 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
   //     }
   if (IsARM && !useInt8GuardVariable) {
     llvm::Value *V = Builder.CreateLoad(guard);
-    V = Builder.CreateAnd(V, Builder.getInt32(1));
+    llvm::Value *Test1 = llvm::ConstantInt::get(guardTy, 1);
+    V = Builder.CreateAnd(V, Test1);
     isInitialized = Builder.CreateIsNull(V, "guard.uninitialized");
 
   // Itanium C++ ABI 3.3.2:
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index a2e575d3f0..b870ae9eb1 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -95,6 +95,7 @@ unsigned TargetCodeGenInfo::getSizeOfUnwindException() const {
   //   x86-32     FreeBSD, Linux, Darwin
   //   PowerPC    Linux, Darwin
   //   ARM        Darwin (*not* EABI)
+  //   AArch64    Linux
   return 32;
 }
 
@@ -3591,6 +3592,420 @@ llvm::Value *NaClARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
 }
 
 //===----------------------------------------------------------------------===//
+// AArch64 ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class AArch64ABIInfo : public ABIInfo {
+public:
+  AArch64ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}
+
+private:
+  // The AArch64 PCS is explicit about return types and argument types being
+  // handled identically, so we don't need to draw a distinction between
+  // Argument and Return classification.
+  ABIArgInfo classifyGenericType(QualType Ty, int &FreeIntRegs,
+                                 int &FreeVFPRegs) const;
+
+  ABIArgInfo tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded, bool IsInt,
+                        llvm::Type *DirectTy = 0) const;
+
+  virtual void computeInfo(CGFunctionInfo &FI) const;
+
+  virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                                 CodeGenFunction &CGF) const;
+};
+
+class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+  AArch64TargetCodeGenInfo(CodeGenTypes &CGT)
+    :TargetCodeGenInfo(new AArch64ABIInfo(CGT)) {}
+
+  const AArch64ABIInfo &getABIInfo() const {
+    return static_cast<const AArch64ABIInfo&>(TargetCodeGenInfo::getABIInfo());
+  }
+
+  int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const {
+    return 31;
+  }
+
+  bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
+                               llvm::Value *Address) const {
+    // 0-31 are x0-x30 and sp: 8 bytes each
+    llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);
+    AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 31);
+
+    // 64-95 are v0-v31: 16 bytes each
+    llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16);
+    AssignToArrayRange(CGF.Builder, Address, Sixteen8, 64, 95);
+
+    return false;
+  }
+
+};
+
+}
+
+void AArch64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
+  int FreeIntRegs = 8, FreeVFPRegs = 8;
+
+  FI.getReturnInfo() = classifyGenericType(FI.getReturnType(),
+                                           FreeIntRegs, FreeVFPRegs);
+
+  FreeIntRegs = FreeVFPRegs = 8;
+  for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
+       it != ie; ++it) {
+    it->info = classifyGenericType(it->type, FreeIntRegs, FreeVFPRegs);
+
+  }
+}
+
+ABIArgInfo
+AArch64ABIInfo::tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded,
+                           bool IsInt, llvm::Type *DirectTy) const {
+  if (FreeRegs >= RegsNeeded) {
+    FreeRegs -= RegsNeeded;
+    return ABIArgInfo::getDirect(DirectTy);
+  }
+
+  llvm::Type *Padding = 0;
+
+  // We need padding so that later arguments don't get filled in anyway. That
+  // wouldn't happen if only ByVal arguments followed in the same category, but
+  // a large structure will simply seem to be a pointer as far as LLVM is
+  // concerned.
+  if (FreeRegs > 0) {
+    if (IsInt)
+      Padding = llvm::Type::getInt64Ty(getVMContext());
+    else
+      Padding = llvm::Type::getFloatTy(getVMContext());
+
+    // Either [N x i64] or [N x float].
+    Padding = llvm::ArrayType::get(Padding, FreeRegs);
+    FreeRegs = 0;
+  }
+
+  return ABIArgInfo::getIndirect(getContext().getTypeAlign(Ty) / 8,
+                                 /*IsByVal=*/ true, /*Realign=*/ false,
+                                 Padding);
+}
+
+
+ABIArgInfo AArch64ABIInfo::classifyGenericType(QualType Ty,
+                                               int &FreeIntRegs,
+                                               int &FreeVFPRegs) const {
+  // Can only occurs for return, but harmless otherwise.
+  if (Ty->isVoidType())
+    return ABIArgInfo::getIgnore();
+
+  // Large vector types should be returned via memory. There's no such concept
+  // in the ABI, but they'd be over 16 bytes anyway so no matter how they're
+  // classified they'd go into memory (see B.3).
+  if (Ty->isVectorType() && getContext().getTypeSize(Ty) > 128) {
+    if (FreeIntRegs > 0)
+      --FreeIntRegs;
+    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+  }
+
+  // All non-aggregate LLVM types have a concrete ABI representation so they can
+  // be passed directly. After this block we're guaranteed to be in a
+  // complicated case.
+  if (!isAggregateTypeForABI(Ty)) {
+    // Treat an enum type as its underlying type.
+    if (const EnumType *EnumTy = Ty->getAs<EnumType>())
+      Ty = EnumTy->getDecl()->getIntegerType();
+
+    if (Ty->isFloatingType() || Ty->isVectorType())
+      return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ false);
+
+    assert(getContext().getTypeSize(Ty) <= 128 &&
+           "unexpectedly large scalar type");
+
+    int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1;
+
+    // If the type may need padding registers to ensure "alignment", we must be
+    // careful when this is accounted for. Increasing the effective size covers
+    // all cases.
+    if (getContext().getTypeAlign(Ty) == 128)
+      RegsNeeded += FreeIntRegs % 2 != 0;
+
+    return tryUseRegs(Ty, FreeIntRegs, RegsNeeded, /*IsInt=*/ true);
+  }
+
+  // Structures with either a non-trivial destructor or a non-trivial
+  // copy constructor are always indirect.
+  if (isRecordWithNonTrivialDestructorOrCopyConstructor(Ty)) {
+    if (FreeIntRegs > 0)
+      --FreeIntRegs;
+    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+  }
+
+  if (isEmptyRecord(getContext(), Ty, true)) {
+    if (!getContext().getLangOpts().CPlusPlus) {
+      // Empty structs outside C++ mode are a GNU extension, so no ABI can
+      // possibly tell us what to do. It turns out (I believe) that GCC ignores
+      // the object for parameter-passsing purposes.
+      return ABIArgInfo::getIgnore();
+    }
+
+    // The combination of C++98 9p5 (sizeof(struct) != 0) and the pseudocode
+    // description of va_arg in the PCS require that an empty struct does
+    // actually occupy space for parameter-passing. I'm hoping for a
+    // clarification giving an explicit paragraph to point to in future.
+    return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ true,
+                      llvm::Type::getInt8Ty(getVMContext()));
+  }
+
+  // Homogeneous vector aggregates get passed in registers or on the stack.
+  const Type *Base = 0;
+  uint64_t NumMembers = 0;
+  if (isHomogeneousAggregate(Ty, Base, getContext(), &NumMembers)) {
+    assert(Base && "Base class should be set for homogeneous aggregate");
+    // Homogeneous aggregates are passed and returned directly.
+    return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ NumMembers,
+                      /*IsInt=*/ false);
+  }
+
+  uint64_t Size = getContext().getTypeSize(Ty);
+  if (Size <= 128) {
+    // Small structs can use the same direct type whether they're in registers
+    // or on the stack.
+    llvm::Type *BaseTy;
+    unsigned NumBases;
+    int SizeInRegs = (Size + 63) / 64;
+
+    if (getContext().getTypeAlign(Ty) == 128) {
+      BaseTy = llvm::Type::getIntNTy(getVMContext(), 128);
+      NumBases = 1;
+
+      // If the type may need padding registers to ensure "alignment", we must
+      // be careful when this is accounted for. Increasing the effective size
+      // covers all cases.
+      SizeInRegs += FreeIntRegs % 2 != 0;
+    } else {
+      BaseTy = llvm::Type::getInt64Ty(getVMContext());
+      NumBases = SizeInRegs;
+    }
+    llvm::Type *DirectTy = llvm::ArrayType::get(BaseTy, NumBases);
+
+    return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ SizeInRegs,
+                      /*IsInt=*/ true, DirectTy);
+  }
+
+  // If the aggregate is > 16 bytes, it's passed and returned indirectly. In
+  // LLVM terms the return uses an "sret" pointer, but that's handled elsewhere.
+  --FreeIntRegs;
+  return ABIArgInfo::getIndirect(0, /* byVal = */ false);
+}
+
+llvm::Value *AArch64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
+                                       CodeGenFunction &CGF) const {
+  // The AArch64 va_list type and handling is specified in the Procedure Call
+  // Standard, section B.4:
+  //
+  // struct {
+  //   void *__stack;
+  //   void *__gr_top;
+  //   void *__vr_top;
+  //   int __gr_offs;
+  //   int __vr_offs;
+  // };
+
+  assert(!CGF.CGM.getDataLayout().isBigEndian()
+         && "va_arg not implemented for big-endian AArch64");
+
+  int FreeIntRegs = 8, FreeVFPRegs = 8;
+  Ty = CGF.getContext().getCanonicalType(Ty);
+  ABIArgInfo AI = classifyGenericType(Ty, FreeIntRegs, FreeVFPRegs);
+
+  llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
+  llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
+  llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
+  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
+
+  llvm::Value *reg_offs_p = 0, *reg_offs = 0;
+  int reg_top_index;
+  int RegSize;
+  if (FreeIntRegs < 8) {
+    assert(FreeVFPRegs == 8 && "Arguments never split between int & VFP regs");
+    // 3 is the field number of __gr_offs
+    reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
+    reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
+    reg_top_index = 1; // field number for __gr_top
+    RegSize = 8 * (8 - FreeIntRegs);
+  } else {
+    assert(FreeVFPRegs < 8 && "Argument must go in VFP or int regs");
+    // 4 is the field number of __vr_offs.
+    reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
+    reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
+    reg_top_index = 2; // field number for __vr_top
+    RegSize = 16 * (8 - FreeVFPRegs);
+  }
+
+  //=======================================
+  // Find out where argument was passed
+  //=======================================
+
+  // If reg_offs >= 0 we're already using the stack for this type of
+  // argument. We don't want to keep updating reg_offs (in case it overflows,
+  // though anyone passing 2GB of arguments, each at most 16 bytes, deserves
+  // whatever they get).
+  llvm::Value *UsingStack = 0;
+  UsingStack = CGF.Builder.CreateICmpSGE(reg_offs,
+                                         llvm::ConstantInt::get(CGF.Int32Ty, 0));
+
+  CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock);
+
+  // Otherwise, at least some kind of argument could go in these registers, the
+  // quesiton is whether this particular type is too big.
+  CGF.EmitBlock(MaybeRegBlock);
+
+  // Integer arguments may need to correct register alignment (for example a
+  // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
+  // align __gr_offs to calculate the potential address.
+  if (FreeIntRegs < 8 && AI.isDirect() && getContext().getTypeAlign(Ty) > 64) {
+    int Align = getContext().getTypeAlign(Ty) / 8;
+
+    reg_offs = CGF.Builder.CreateAdd(reg_offs,
+                                 llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
+                                 "align_regoffs");
+    reg_offs = CGF.Builder.CreateAnd(reg_offs,
+                                    llvm::ConstantInt::get(CGF.Int32Ty, -Align),
+                                    "aligned_regoffs");
+  }
+
+  // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
+  llvm::Value *NewOffset = 0;
+  NewOffset = CGF.Builder.CreateAdd(reg_offs,
+                                    llvm::ConstantInt::get(CGF.Int32Ty, RegSize),
+                                    "new_reg_offs");
+  CGF.Builder.CreateStore(NewOffset, reg_offs_p);
+
+  // Now we're in a position to decide whether this argument really was in
+  // registers or not.
+  llvm::Value *InRegs = 0;
+  InRegs = CGF.Builder.CreateICmpSLE(NewOffset,
+                                     llvm::ConstantInt::get(CGF.Int32Ty, 0),
+                                     "inreg");
+
+  CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock);
+
+  //=======================================
+  // Argument was in registers
+  //=======================================
+
+  // Now we emit the code for if the argument was originally passed in
+  // registers. First start the appropriate block:
+  CGF.EmitBlock(InRegBlock);
+
+  llvm::Value *reg_top_p = 0, *reg_top = 0;
+  reg_top_p = CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p");
+  reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
+  llvm::Value *BaseAddr = CGF.Builder.CreateGEP(reg_top, reg_offs);
+  llvm::Value *RegAddr = 0;
+  llvm::Type *MemTy = llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
+
+  if (!AI.isDirect()) {
+    // If it's been passed indirectly (actually a struct), whatever we find from
+    // stored registers or on the stack will actually be a struct **.
+    MemTy = llvm::PointerType::getUnqual(MemTy);
+  }
+
+  const Type *Base = 0;
+  uint64_t NumMembers;
+  if (isHomogeneousAggregate(Ty, Base, getContext(), &NumMembers)
+      && NumMembers > 1) {
+    // Homogeneous aggregates passed in registers will have their elements split
+    // and stored 16-bytes apart regardless of size (they're notionally in qN,
+    // qN+1, ...). We reload and store into a temporary local variable
+    // contiguously.
+    assert(AI.isDirect() && "Homogeneous aggregates should be passed directly");
+    llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
+    llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
+    llvm::Value *Tmp = CGF.CreateTempAlloca(HFATy);
+
+    for (unsigned i = 0; i < NumMembers; ++i) {
+      llvm::Value *BaseOffset = llvm::ConstantInt::get(CGF.Int32Ty, 16 * i);
+      llvm::Value *LoadAddr = CGF.Builder.CreateGEP(BaseAddr, BaseOffset);
+      LoadAddr = CGF.Builder.CreateBitCast(LoadAddr,
+                                           llvm::PointerType::getUnqual(BaseTy));
+      llvm::Value *StoreAddr = CGF.Builder.CreateStructGEP(Tmp, i);
+
+      llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr);
+      CGF.Builder.CreateStore(Elem, StoreAddr);
+    }
+
+    RegAddr = CGF.Builder.CreateBitCast(Tmp, MemTy);
+  } else {
+    // Otherwise the object is contiguous in memory
+    RegAddr = CGF.Builder.CreateBitCast(BaseAddr, MemTy);
+  }
+
+  CGF.EmitBranch(ContBlock);
+
+  //=======================================
+  // Argument was on the stack
+  //=======================================
+  CGF.EmitBlock(OnStackBlock);
+
+  llvm::Value *stack_p = 0, *OnStackAddr = 0;
+  stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p");
+  OnStackAddr = CGF.Builder.CreateLoad(stack_p, "stack");
+
+  // Again, stack arguments may need realigmnent. In this case both integer and
+  // floating-point ones might be affected.
+  if (AI.isDirect() && getContext().getTypeAlign(Ty) > 64) {
+    int Align = getContext().getTypeAlign(Ty) / 8;
+
+    OnStackAddr = CGF.Builder.CreatePtrToInt(OnStackAddr, CGF.Int64Ty);
+
+    OnStackAddr = CGF.Builder.CreateAdd(OnStackAddr,
+                                 llvm::ConstantInt::get(CGF.Int64Ty, Align - 1),
+                                 "align_stack");
+    OnStackAddr = CGF.Builder.CreateAnd(OnStackAddr,
+                                    llvm::ConstantInt::get(CGF.Int64Ty, -Align),
+                                    "align_stack");
+
+    OnStackAddr = CGF.Builder.CreateIntToPtr(OnStackAddr, CGF.Int8PtrTy);
+  }
+
+  uint64_t StackSize;
+  if (AI.isDirect())
+    StackSize = getContext().getTypeSize(Ty) / 8;
+  else
+    StackSize = 8;
+
+  // All stack slots are 8 bytes
+  StackSize = llvm::RoundUpToAlignment(StackSize, 8);
+
+  llvm::Value *StackSizeC = llvm::ConstantInt::get(CGF.Int32Ty, StackSize);
+  llvm::Value *NewStack = CGF.Builder.CreateGEP(OnStackAddr, StackSizeC,
+                                                "new_stack");
+
+  // Write the new value of __stack for the next call to va_arg
+  CGF.Builder.CreateStore(NewStack, stack_p);
+
+  OnStackAddr = CGF.Builder.CreateBitCast(OnStackAddr, MemTy);
+
+  CGF.EmitBranch(ContBlock);
+
+  //=======================================
+  // Tidy up
+  //=======================================
+  CGF.EmitBlock(ContBlock);
+
+  llvm::PHINode *ResAddr = CGF.Builder.CreatePHI(MemTy, 2, "vaarg.addr");
+  ResAddr->addIncoming(RegAddr, InRegBlock);
+  ResAddr->addIncoming(OnStackAddr, OnStackBlock);
+
+  if (AI.isDirect())
+    return ResAddr;
+
+  return CGF.Builder.CreateLoad(ResAddr, "vaarg.addr");
+}
+
+//===----------------------------------------------------------------------===//
 // NVPTX ABI Implementation
 //===----------------------------------------------------------------------===//
 
@@ -4397,6 +4812,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
   case llvm::Triple::mips64el:
     return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, false));
 
+  case llvm::Triple::aarch64:
+    return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types));
+
   case llvm::Triple::arm:
   case llvm::Triple::thumb:
     {
diff --git a/lib/Driver/ToolChains.cpp b/lib/Driver/ToolChains.cpp
index 6f68f1c577..39f0f06de8 100644
--- a/lib/Driver/ToolChains.cpp
+++ b/lib/Driver/ToolChains.cpp
@@ -1081,6 +1081,12 @@ Generic_GCC::GCCInstallationDetector::GCCInstallationDetector(
   // Declare a bunch of static data sets that we'll select between below. These
   // are specifically designed to always refer to string literals to avoid any
   // lifetime or initialization issues.
+  static const char *const AArch64LibDirs[] = { "/lib" };
+  static const char *const AArch64Triples[] = {
+    "aarch64-none-linux-gnu",
+    "aarch64-linux-gnu"
+  };
+
   static const char *const ARMLibDirs[] = { "/lib" };
   static const char *const ARMTriples[] = {
     "arm-linux-gnueabi",
@@ -1146,6 +1152,16 @@ Generic_GCC::GCCInstallationDetector::GCCInstallationDetector(
   };
 
   switch (TargetTriple.getArch()) {
+  case llvm::Triple::aarch64:
+    LibDirs.append(AArch64LibDirs, AArch64LibDirs 
+                   + llvm::array_lengthof(AArch64LibDirs));
+    TripleAliases.append(
+      AArch64Triples, AArch64Triples + llvm::array_lengthof(AArch64Triples));
+    MultiarchLibDirs.append(
+      AArch64LibDirs, AArch64LibDirs + llvm::array_lengthof(AArch64LibDirs));
+    MultiarchTripleAliases.append(
+      AArch64Triples, AArch64Triples + llvm::array_lengthof(AArch64Triples));
+    break;
   case llvm::Triple::arm:
   case llvm::Triple::thumb:
     LibDirs.append(ARMLibDirs, ARMLibDirs + llvm::array_lengthof(ARMLibDirs));
@@ -2218,6 +2234,9 @@ static std::string getMultiarchTriple(const llvm::Triple TargetTriple,
     if (llvm::sys::fs::exists(SysRoot + "/lib/x86_64-linux-gnu"))
       return "x86_64-linux-gnu";
     return TargetTriple.str();
+  case llvm::Triple::aarch64:
+    if (llvm::sys::fs::exists(SysRoot + "/lib/aarch64-linux-gnu"))
+      return "aarch64-linux-gnu";
   case llvm::Triple::mips:
     if (llvm::sys::fs::exists(SysRoot + "/lib/mips-linux-gnu"))
       return "mips-linux-gnu";
@@ -2444,6 +2463,7 @@ void Linux::addClangTargetOptions(const ArgList &DriverArgs,
   const Generic_GCC::GCCVersion &V = GCCInstallation.getVersion();
   bool UseInitArrayDefault
     = V >= Generic_GCC::GCCVersion::Parse("4.7.0") ||
+      getTriple().getArch() == llvm::Triple::aarch64 ||
       getTriple().getEnvironment() == llvm::Triple::Android;
   if (DriverArgs.hasFlag(options::OPT_fuse_init_array,
                          options::OPT_fno_use_init_array,
@@ -2506,6 +2526,9 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
     "/usr/include/i686-linux-gnu",
     "/usr/include/i486-linux-gnu"
   };
+  const StringRef AArch64MultiarchIncludeDirs[] = {
+    "/usr/include/aarch64-linux-gnu"
+  };
   const StringRef ARMMultiarchIncludeDirs[] = {
     "/usr/include/arm-linux-gnueabi"
   };
@@ -2529,6 +2552,8 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
     MultiarchIncludeDirs = X86_64MultiarchIncludeDirs;
   } else if (getTriple().getArch() == llvm::Triple::x86) {
     MultiarchIncludeDirs = X86MultiarchIncludeDirs;
+  } else if (getTriple().getArch() == llvm::Triple::aarch64) {
+    MultiarchIncludeDirs = AArch64MultiarchIncludeDirs;
   } else if (getTriple().getArch() == llvm::Triple::arm) {
     if (getTriple().getEnvironment() == llvm::Triple::GNUEABIHF)
       MultiarchIncludeDirs = ARMHFMultiarchIncludeDirs;
diff --git a/lib/Driver/ToolChains.h b/lib/Driver/ToolChains.h
index c0a9646c28..b313ab610c 100644
--- a/lib/Driver/ToolChains.h
+++ b/lib/Driver/ToolChains.h
@@ -388,7 +388,8 @@ public:
 
   virtual bool IsIntegratedAssemblerDefault() const {
     // Default integrated assembler to on for x86.
-    return (getTriple().getArch() == llvm::Triple::x86 ||
+    return (getTriple().getArch() == llvm::Triple::aarch64 ||
+            getTriple().getArch() == llvm::Triple::x86 ||
             getTriple().getArch() == llvm::Triple::x86_64);
   }
 };
diff --git a/lib/Driver/Tools.cpp b/lib/Driver/Tools.cpp
index f925f762e5..cb7cc2381d 100644
--- a/lib/Driver/Tools.cpp
+++ b/lib/Driver/Tools.cpp
@@ -549,6 +549,7 @@ static bool isSignedCharDefault(const llvm::Triple &Triple) {
   default:
     return true;
 
+  case llvm::Triple::aarch64:
   case llvm::Triple::arm:
   case llvm::Triple::ppc:
   case llvm::Triple::ppc64:
@@ -5578,6 +5579,8 @@ void linuxtools::Link::ConstructJob(Compilation &C, const JobAction &JA,
   CmdArgs.push_back("-m");
   if (ToolChain.getArch() == llvm::Triple::x86)
     CmdArgs.push_back("elf_i386");
+  else if (ToolChain.getArch() == llvm::Triple::aarch64)
+    CmdArgs.push_back("aarch64linux");
   else if (ToolChain.getArch() == llvm::Triple::arm
            ||  ToolChain.getArch() == llvm::Triple::thumb)
     CmdArgs.push_back("armelf_linux_eabi");
@@ -5626,6 +5629,8 @@ void linuxtools::Link::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back("/system/bin/linker");
     else if (ToolChain.getArch() == llvm::Triple::x86)
       CmdArgs.push_back("/lib/ld-linux.so.2");
+    else if (ToolChain.getArch() == llvm::Triple::aarch64)
+      CmdArgs.push_back("/lib/ld-linux-aarch64.so.1");
     else if (ToolChain.getArch() == llvm::Triple::arm ||
              ToolChain.getArch() == llvm::Triple::thumb) {
       if (ToolChain.getTriple().getEnvironment() == llvm::Triple::GNUEABIHF)
diff --git a/test/CodeGen/aarch64-arguments.c b/test/CodeGen/aarch64-arguments.c
new file mode 100644
index 0000000000..2255b8244d
--- /dev/null
+++ b/test/CodeGen/aarch64-arguments.c
@@ -0,0 +1,194 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -w -o - %s | FileCheck -check-prefix=PCS %s
+
+// Sign extension is performed by the callee on AArch64, which means
+// that we *shouldn't* tag arguments and returns with their extension.
+
+// PCS: define i8 @f0(i16 %a)
+char f0(short a) {
+  return a;
+}
+
+// PCS: define [1 x i64] @f1()
+struct s1 { char f0; };
+struct s1 f1(void) {}
+
+// PCS: define [1 x i64] @f2()
+struct s2 { short f0; };
+struct s2 f2(void) {}
+
+// PCS: define [1 x i64] @f3()
+struct s3 { int f0; };
+struct s3 f3(void) {}
+
+// PCS: define [1 x i64] @f4()
+struct s4 { struct s4_0 { int f0; } f0; };
+struct s4 f4(void) {}
+
+// PCS: define [1 x i64] @f5()
+struct s5 { struct { } f0; int f1; };
+struct s5 f5(void) {}
+
+// PCS: define  [1 x i64] @f6()
+struct s6 { int f0[1]; };
+struct s6 f6(void) {}
+
+// PCS: define void @f7()
+struct s7 { struct { int : 0; } f0; };
+struct s7 f7(void) {}
+
+// PCS: define  void @f8()
+struct s8 { struct { int : 0; } f0[1]; };
+struct s8 f8(void) {}
+
+// PCS: define [1 x i64] @f9()
+struct s9 { long f0; int : 0; };
+struct s9 f9(void) {}
+
+// PCS: define [1 x i64] @f10()
+struct s10 { long f0; int : 0; int : 0; };
+struct s10 f10(void) {}
+
+// PCS: define [1 x i64] @f11()
+struct s11 { int : 0; long f0; };
+struct s11 f11(void) {}
+
+// PCS: define [1 x i64] @f12()
+union u12 { char f0; short f1; int f2; long f3; };
+union u12 f12(void) {}
+
+// PCS: define %struct.s13 @f13()
+struct s13 { float f0; };
+struct s13 f13(void) {}
+
+// PCS: define %union.u14 @f14()
+union u14 { float f0; };
+union u14 f14(void) {}
+
+// PCS: define void @f15()
+void f15(struct s7 a0) {}
+
+// PCS: define void @f16()
+void f16(struct s8 a0) {}
+
+// PCS: define [1 x i64] @f17()
+struct s17 { short f0 : 13; char f1 : 4; };
+struct s17 f17(void) {}
+
+// PCS: define [1 x i64] @f18()
+struct s18 { short f0; char f1 : 4; };
+struct s18 f18(void) {}
+
+// PCS: define [1 x i64] @f19()
+struct s19 { long f0; struct s8 f1; };
+struct s19 f19(void) {}
+
+// PCS: define [1 x i64] @f20()
+struct s20 { struct s8 f1; long f0; };
+struct s20 f20(void) {}
+
+// PCS: define [1 x i64] @f21()
+struct s21 { struct {} f1; long f0 : 4; };
+struct s21 f21(void) {}
+
+// PCS: define { float, float } @f22()
+// PCS: define { double, double } @f23(
+_Complex float      f22(void) {}
+_Complex double     f23(void) {}
+
+// PCS: define [1 x i64] @f24()
+struct s24 { _Complex char f0; };
+struct s24 f24() {}
+
+// PCS: define [1 x i64] @f25()
+struct s25 { _Complex short f0; };
+struct s25 f25() {}
+
+// PCS: define [1 x i64] @f26()
+struct s26 { _Complex int f0; };
+struct s26 f26() {}
+
+// PCS: define [2 x i64] @f27()
+struct s27 { _Complex long f0; };
+struct s27 f27() {}
+
+// PCS: define void @f28(i8 %a, i16 %b, i32 %c, i64 %d, float %e, double %f)
+void f28(char a, short b, int c, long d, float e, double f) {}
+
+// PCS: define void @f29([2 x i64] %a
+struct s29 { int arr[4]; };
+void f29(struct s29 a) {}
+
+// PCS: define void @f30(%struct.s30* %a)
+struct s30 { int arr[4]; char c;};
+void f30(struct s30 a) {}
+
+// PCS: define void @f31([4 x double] %a
+struct s31 { double arr[4]; };
+void f31(struct s31 a) {}
+
+// PCS: define void @f32(%struct.s32* %a)
+struct s32 { float arr[5]; };
+void f32(struct s32 a) {}
+
+// Not the only solution, but it *is* an HFA.
+// PCS: define void @f33([3 x float] %a.coerce0, float %a.coerce1)
+struct s33 { float arr[3]; float a; };
+void f33(struct s33 a) {}
+
+// PCS: define void @f34(%struct.s34* sret noalias
+struct s34 { int a[4]; char b };
+struct s34 f34(void) {}
+
+// PCS: define void @f35()
+struct s35 {};
+void f35(struct s35 a) {}
+
+// Check padding is added:
+// PCS: @f36(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, [1 x i64], %struct.s36* byval align 8 %stacked)
+struct s36 { long a, b; };
+void f36(int x0, int x1, int x2, int x3, int x4, int x5, int x6, struct s36 stacked) {}
+
+// But only once:
+// PCS: @f37(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, [1 x i64], %struct.s37* byval align 8 %stacked, %struct.s37* byval align 8 %stacked2)
+struct s37 { long a, b; };
+void f37(int x0, int x1, int x2, int x3, int x4, int x5, int x6, struct s37 stacked, struct s37 stacked2) {}
+
+// Check for HFA padding args. Also, they should not end up on the stack in a
+// way which will have holes in when lowered further by LLVM. In particular [3 x
+// float] would be unacceptable.
+
+// PCS: @f38(float %s0, double %d1, float %s2, float %s3, float %s4, float %s5, [2 x float], %struct.s38* byval align 4 %stacked)
+struct s38 { float a, b, c; };
+void f38(float s0, double d1, float s2, float s3, float s4, float s5, struct s38 stacked) {}
+
+// Check both VFP and integer arguments are padded (also that pointers and enums
+// get counted as integer types correctly).
+struct s39_int { long a, b; };
+struct s39_float { float a, b, c, d; };
+enum s39_enum { Val1, Val2 };
+// PCS: @f39(float %s0, i32 %x0, float %s1, i32* %x1, float %s2, i32 %x2, float %s3, float %s4, i32 %x3, [3 x float], %struct.s39_float* byval align 4 %stacked, i32 %x4, i32 %x5, i32 %x6, [1 x i64], %struct.s39_int* byval align 8 %stacked2)
+void f39(float s0, int x0, float s1, int *x1, float s2, enum s39_enum x2, float s3, float s4,
+         int x3, struct s39_float stacked, int x4, int x5, int x6,
+         struct s39_int stacked2) {}
+
+struct s40 { __int128 a; };
+// PCS: @f40(i32 %x0, [1 x i128] %x2_3.coerce, i32 %x4, i32 %x5, i32 %x6, [1 x i64], %struct.s40* byval align 16 %stacked)
+void f40(int x0, struct s40 x2_3, int x4, int x5, int x6, struct s40 stacked) {}
+
+// Checking: __int128 will get properly aligned type, with padding so big struct doesn't use x7.
+struct s41 { int arr[5]; };
+// PCS: @f41(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, [1 x i64], i128* byval align 16, %struct.s41* %stacked2)
+int f41(int x0, int x1, int x2, int x3, int x4, int x5, int x6, __int128 stacked, struct s41 stacked2) {}
+
+// Checking: __int128 needing to be aligned in registers will consume correct
+// number. Previously padding was inserted before "stacked" because x6_7 was
+// "allocated" to x5 and x6 by clang.
+// PCS: @f42(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i128 %x6_7, i128* byval align 16)
+void f42(int x0, int x1, int x2, int x3, int x4, __int128 x6_7, __int128 stacked) {}
+
+// Checking: __fp16 is extended to double when calling variadic functions
+void variadic(int a, ...);
+void f43(__fp16 *in) {
+  variadic(42, *in);
+// CHECK: call void @variadic(i32 42, double
+}
diff --git a/test/CodeGen/aarch64-inline-asm.c b/test/CodeGen/aarch64-inline-asm.c
new file mode 100644
index 0000000000..ca39c6e7ff
--- /dev/null
+++ b/test/CodeGen/aarch64-inline-asm.c
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+// The only part clang really deals with is the lvalue/rvalue
+// distinction on constraints. It's sufficient to emit llvm and make
+// sure that's sane.
+
+long var;
+
+void test_generic_constraints(int var32, long var64) {
+    asm("add %0, %1, %1" : "=r"(var32) : "0"(var32));
+// CHECK: [[R32_ARG:%[a-zA-Z0-9]+]] = load i32*
+// CHECK: call i32 asm "add $0, $1, $1", "=r,0"(i32 [[R32_ARG]])
+
+    asm("add %0, %1, %1" : "=r"(var64) : "0"(var64));
+// CHECK: [[R32_ARG:%[a-zA-Z0-9]+]] = load i64*
+// CHECK: call i64 asm "add $0, $1, $1", "=r,0"(i64 [[R32_ARG]])
+
+    asm("ldr %0, %1" : "=r"(var32) : "m"(var));
+    asm("ldr %0, [%1]" : "=r"(var64) : "r"(&var));
+// CHECK: call i32 asm "ldr $0, $1", "=r,*m"(i64* @var)
+// CHECK: call i64 asm "ldr $0, [$1]", "=r,r"(i64* @var)
+}
+
+float f;
+double d;
+void test_constraint_w() {
+    asm("fadd %s0, %s1, %s1" : "=w"(f) : "w"(f));
+// CHECK: [[FLT_ARG:%[a-zA-Z_0-9]+]] = load float* @f
+// CHECK: call float asm "fadd ${0:s}, ${1:s}, ${1:s}", "=w,w"(float [[FLT_ARG]])
+
+    asm("fadd %d0, %d1, %d1" : "=w"(d) : "w"(d));
+// CHECK: [[DBL_ARG:%[a-zA-Z_0-9]+]] = load double* @d
+// CHECK: call double asm "fadd ${0:d}, ${1:d}, ${1:d}", "=w,w"(double [[DBL_ARG]])
+}
+
+void test_constraints_immed(void) {
+    asm("add x0, x0, %0" : : "I"(4095) : "x0");
+    asm("and w0, w0, %0" : : "K"(0xaaaaaaaa) : "w0");
+    asm("and x0, x0, %0" : : "L"(0xaaaaaaaaaaaaaaaa) : "x0");
+// CHECK: call void asm sideeffect "add x0, x0, $0", "I,~{x0}"(i32 4095)
+// CHECK: call void asm sideeffect "and w0, w0, $0", "K,~{w0}"(i32 -1431655766)
+// CHECK: call void asm sideeffect "and x0, x0, $0", "L,~{x0}"(i64 -6148914691236517206)
+}
+
+void test_constraint_S(void) {
+    int *addr;
+    asm("adrp %0, %A1\n\t"
+        "add %0, %0, %L1" : "=r"(addr) : "S"(&var));
+// CHECK: call i32* asm "adrp $0, ${1:A}\0A\09add $0, $0, ${1:L}", "=r,S"(i64* @var)
+}
+
+void test_constraint_Q(void) {
+    int val;
+    asm("ldxr %0, %1" : "=r"(val) : "Q"(var));
+// CHECK: call i32 asm "ldxr $0, $1", "=r,*Q"(i64* @var)
+}
diff --git a/test/CodeGen/aarch64-type-sizes.c b/test/CodeGen/aarch64-type-sizes.c
new file mode 100644
index 0000000000..3b9c9fc426
--- /dev/null
+++ b/test/CodeGen/aarch64-type-sizes.c
@@ -0,0 +1,90 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -w -o - %s | FileCheck %s
+
+// char by definition has size 1
+
+int check_short() {
+  return sizeof(short);
+// CHECK: ret i32 2
+}
+
+int check_int() {
+  return sizeof(int);
+// CHECK: ret i32 4
+}
+
+int check_long() {
+// Both 4 and 8 are permitted under the PCS, Linux says 8!
+  return sizeof(long);
+// CHECK: ret i32 8
+}
+
+int check_longlong() {
+  return sizeof(long long);
+// CHECK: ret i32 8
+}
+
+int check_int128() {
+  return sizeof(__int128);
+// CHECK: ret i32 16
+}
+
+int check_fp16() {
+  return sizeof(__fp16);
+// CHECK: ret i32 2
+}
+
+int check_float() {
+  return sizeof(float);
+// CHECK: ret i32 4
+}
+
+int check_double() {
+  return sizeof(double);
+// CHECK: ret i32 8
+}
+
+int check_longdouble() {
+  return sizeof(long double);
+// CHECK: ret i32 16
+}
+
+int check_floatComplex() {
+  return sizeof(float _Complex);
+// CHECK: ret i32 8
+}
+
+int check_doubleComplex() {
+  return sizeof(double _Complex);
+// CHECK: ret i32 16
+}
+
+int check_longdoubleComplex() {
+  return sizeof(long double _Complex);
+// CHECK: ret i32 32
+}
+
+int check_bool() {
+  return sizeof(_Bool);
+// CHECK: ret i32 1
+}
+
+int check_wchar() {
+// PCS allows either unsigned short or unsigned int. Linux again says "bigger!"
+  return sizeof(__WCHAR_TYPE__);
+// CHECK: ret i32 4
+}
+
+int check_wchar_unsigned() {
+  return (__WCHAR_TYPE__)-1 > (__WCHAR_TYPE__)0;
+// CHECK: ret i32 1
+}
+
+enum Small {
+  Item
+};
+
+int foo() {
+  return sizeof(enum Small);
+// CHECK: ret i32 4
+}
+
diff --git a/test/CodeGen/aarch64-varargs.c b/test/CodeGen/aarch64-varargs.c
new file mode 100644
index 0000000000..324a070827
--- /dev/null
+++ b/test/CodeGen/aarch64-varargs.c
@@ -0,0 +1,238 @@
+// RUN: %clang_cc1 -triple aarch64 -emit-llvm -o - %s | FileCheck %s
+#include <stdarg.h>
+
+// Obviously there's more than one way to implement va_arg. This test should at
+// least prevent unintentional regressions caused by refactoring.
+
+va_list the_list;
+
+int simple_int(void) {
+// CHECK: define i32 @simple_int
+  return va_arg(the_list, int);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i32*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to i32*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi i32* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+// CHECK: [[RESULT:%[a-z_0-9]+]] = load i32* [[ADDR]]
+// CHECK: ret i32 [[RESULT]]
+}
+
+__int128 aligned_int(void) {
+// CHECK: define i128 @aligned_int
+  return va_arg(the_list, __int128);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[ALIGN_REGOFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 15
+// CHECK: [[ALIGNED_REGOFFS:%[a-z_0-9]+]] = and i32 [[ALIGN_REGOFFS]], -16
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[ALIGNED_REGOFFS]], 16
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i128*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[STACKINT:%[a-z_0-9]+]] = ptrtoint i8* [[STACK]] to i64
+// CHECK: [[ALIGN_STACK:%[a-z_0-9]+]] = add i64 [[STACKINT]], 15
+// CHECK: [[ALIGNED_STACK_INT:%[a-z_0-9]+]] = and i64 [[ALIGN_STACK]], -16
+// CHECK: [[ALIGNED_STACK_PTR:%[a-z_0-9]+]] = inttoptr i64 [[ALIGNED_STACK_INT]] to i8*
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[ALIGNED_STACK_PTR]], i32 16
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[ALIGNED_STACK_PTR]] to i128*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi i128* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+// CHECK: [[RESULT:%[a-z_0-9]+]] = load i128* [[ADDR]]
+// CHECK: ret i128 [[RESULT]]
+}
+
+struct bigstruct {
+  int a[10];
+};
+
+struct bigstruct simple_indirect(void) {
+// CHECK: define void @simple_indirect
+  return va_arg(the_list, struct bigstruct);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK-NOT: and i32
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.bigstruct**
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK-NOT: and i64
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.bigstruct**
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.bigstruct** [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+// CHECK: load %struct.bigstruct** [[ADDR]]
+}
+
+struct aligned_bigstruct {
+  float a;
+  long double b;
+};
+
+struct aligned_bigstruct simple_aligned_indirect(void) {
+// CHECK: define void @simple_aligned_indirect
+  return va_arg(the_list, struct aligned_bigstruct);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.aligned_bigstruct**
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.aligned_bigstruct**
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.aligned_bigstruct** [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+// CHECK: load %struct.aligned_bigstruct** [[ADDR]]
+}
+
+double simple_double(void) {
+// CHECK: define double @simple_double
+  return va_arg(the_list, double);
+// CHECK: [[VR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[VR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK]], label %[[VAARG_MAYBE_REG]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[VR_OFFS]], 16
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 2)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[VR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to double*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to double*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi double* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+// CHECK: [[RESULT:%[a-z_0-9]+]] = load double* [[ADDR]]
+// CHECK: ret double [[RESULT]]
+}
+
+struct hfa {
+  float a, b;
+};
+
+struct hfa simple_hfa(void) {
+// CHECK: define %struct.hfa @simple_hfa
+  return va_arg(the_list, struct hfa);
+// CHECK: [[VR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[VR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[VR_OFFS]], 32
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 2)
+// CHECK: [[FIRST_REG:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[VR_OFFS]]
+// CHECK: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[FIRST_REG]], i32 0
+// CHECK: [[EL_TYPED:%[a-z_0-9]+]] = bitcast i8* [[EL_ADDR]] to float*
+// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float]* %[[TMP_HFA:[a-z_.0-9]+]], i32 0, i32 0
+// CHECK: [[EL:%[a-z_0-9]+]] = load float* [[EL_TYPED]]
+// CHECK: store float [[EL]], float* [[EL_TMPADDR]]
+// CHECK: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[FIRST_REG]], i32 16
+// CHECK: [[EL_TYPED:%[a-z_0-9]+]] = bitcast i8* [[EL_ADDR]] to float*
+// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float]* %[[TMP_HFA]], i32 0, i32 1
+// CHECK: [[EL:%[a-z_0-9]+]] = load float* [[EL_TYPED]]
+// CHECK: store float [[EL]], float* [[EL_TMPADDR]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast [2 x float]* %[[TMP_HFA]] to %struct.hfa*
+// CHECK: br label %[[VAARG_END:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.hfa*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.hfa* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+}
+
+void check_start(int n, ...) {
+// CHECK: define void @check_start(i32 %n, ...)
+
+  va_list the_list;
+  va_start(the_list, n);
+// CHECK: [[THE_LIST:%[a-z_0-9]+]] = alloca %struct.__va_list
+// CHECK: [[VOIDP_THE_LIST:%[a-z_0-9]+]] = bitcast %struct.__va_list* [[THE_LIST]] to i8*
+// CHECK: call void @llvm.va_start(i8* [[VOIDP_THE_LIST]])
+}
+
+
diff --git a/test/CodeGenCXX/aarch64-arguments.cpp b/test/CodeGenCXX/aarch64-arguments.cpp
new file mode 100644
index 0000000000..f56ad0bbdc
--- /dev/null
+++ b/test/CodeGenCXX/aarch64-arguments.cpp
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux -emit-llvm -w -o - %s | FileCheck -check-prefix=PCS %s
+
+// PCS: define void @{{.*}}(i8 %a
+struct s0 {};
+void f0(s0 a) {}
diff --git a/test/CodeGenCXX/aarch64-cxxabi.cpp b/test/CodeGenCXX/aarch64-cxxabi.cpp
new file mode 100644
index 0000000000..04d9493ae6
--- /dev/null
+++ b/test/CodeGenCXX/aarch64-cxxabi.cpp
@@ -0,0 +1,96 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -w -o - %s | FileCheck %s
+
+// Check differences between the generic Itanium ABI, the AArch32 version and
+// the AArch64 version.
+
+////////////////////////////////////////////////////////////////////////////////
+
+// The ABI says that the key function is the "textually first, non-inline,
+// non-pure, virtual member function". The generic version decides this after
+// the completion of the class definition; the AArch32 version decides this at
+// the end of the translation unit.
+
+// We construct a class which needs a VTable here under generic ABI, but not
+// AArch32.
+
+// (see next section for explanation of guard)
+// CHECK: @_ZGVZ15guard_variablesiE4mine = internal global i64 0
+
+// CHECK: @_ZTV16CheckKeyFunction =
+struct CheckKeyFunction {
+  virtual void foo();
+};
+
+// This is not inline when CheckKeyFunction is completed, so
+// CheckKeyFunction::foo is the key function. VTables should be emitted.
+inline void CheckKeyFunction::foo() {
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Guard variables only specify and use the low bit to determine status, rather
+// than the low byte as in the generic Itanium ABI. However, unlike 32-bit ARM,
+// they *are* 64-bits wide so check that in case confusion has occurred.
+
+class Guarded {
+public:
+  Guarded(int i);
+  ~Guarded();
+};
+
+void guard_variables(int a) {
+  static Guarded mine(a);
+// CHECK: [[GUARDBIT:%[0-9]+]] = and i64 {{%[0-9]+}}, 1
+// CHECK: icmp eq i64 [[GUARDBIT]], 0
+
+  // As guards are 64-bit, these helpers should take 64-bit pointers.
+// CHECK: call i32 @__cxa_guard_acquire(i64*
+// CHECK: call void @__cxa_guard_release(i64*
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// Member function pointers use the adj field to distinguish between virtual and
+// nonvirtual members. As a result the adjustment is shifted (if ptr was used, a
+// mask would be expected instead).
+
+class C {
+  int a();
+  virtual int b();
+};
+
+
+int member_pointer(C &c, int (C::*func)()) {
+// CHECK: ashr i64 %[[MEMPTRADJ:[0-9a-z.]+]], 1
+// CHECK: %[[ISVIRTUAL:[0-9]+]] = and i64 %[[MEMPTRADJ]], 1
+// CHECK: icmp ne i64 %[[ISVIRTUAL]], 0
+  return (c.*func)();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// AArch64 PCS says that va_list type is based on "struct __va_list ..." in the
+// std namespace, which means it should mangle as "St9__va_list".
+
+// CHECK: @_Z7va_funcSt9__va_list
+void va_func(__builtin_va_list l) {
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// AArch64 constructors (like generic Itanium, but unlike AArch32) do not return
+// "this".
+
+void test_constructor() {
+  Guarded g(42);
+// CHECK: call void @_ZN7GuardedC1Ei
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// In principle the AArch32 ABI allows this to be accomplished via a call to
+// __aeabi_atexit instead of __cxa_atexit. Clang doesn't make use of this at the
+// moment, but it's definitely not allowed for AArch64.
+
+// CHECK: call i32 @__cxa_atexit
+Guarded g(42);
diff --git a/test/Driver/aarch64-features.c b/test/Driver/aarch64-features.c
new file mode 100644
index 0000000000..2acb7157f6
--- /dev/null
+++ b/test/Driver/aarch64-features.c
@@ -0,0 +1,5 @@
+// RUN: %clang -target aarch64-none-linux-gnu -### %s -fsyntax-only 2>&1 | FileCheck %s
+
+// The AArch64 PCS states that chars should be unsigned.
+// CHECK: fno-signed-char
+
diff --git a/test/Preprocessor/aarch64-target-features.c b/test/Preprocessor/aarch64-target-features.c
new file mode 100644
index 0000000000..65104e3311
--- /dev/null
+++ b/test/Preprocessor/aarch64-target-features.c
@@ -0,0 +1,30 @@
+// RUN: %clang -target aarch64-none-linux-gnu -x c -E -dM %s -o - | FileCheck %s
+// CHECK: __AARCH 8
+// CHECK: __AARCH64EL__
+// CHECK: __AARCH_ACLE 101
+// CHECK-NOT: __AARCH_ADVSIMD_FP
+// CHECK-NOT: __AARCH_FEATURE_ADVSIMD
+// CHECK-NOT: __AARCH_FEATURE_BIG_ENDIAN
+// CHECK: __AARCH_FEATURE_CLZ 1
+// CHECK: __AARCH_FEATURE_FMA 1
+// CHECK: __AARCH_FEATURE_LDREX 0xf
+// CHECK: __AARCH_FEATURE_UNALIGNED 1
+// CHECK: __AARCH_FP 0xe
+// CHECK-NOT: __AARCH_FP_FAST
+// CHECK: __AARCH_FP16_FORMAT_IEEE 1
+// CHECK: __AARCH_FP_FENV_ROUNDING 1
+// CHECK: __AARCH_PROFILE 'A'
+// CHECK: __AARCH_SIZEOF_MINIMAL_ENUM 4
+// CHECK: __AARCH_SIZEOF_WCHAR_T 4
+// CHECK: __aarch64__
+
+
+// RUN: %clang -target aarch64-none-linux-gnu -ffast-math -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-FASTMATH %s
+// CHECK-FASTMATH: __AARCH_FP_FAST
+
+// RUN: %clang -target aarch64-none-linux-gnu -fshort-wchar -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SHORTWCHAR %s
+// CHECK-SHORTWCHAR: __AARCH_SIZEOF_WCHAR_T 2
+
+// RUN: %clang -target aarch64-none-linux-gnu -fshort-enums -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SHORTENUMS %s
+// CHECK-SHORTENUMS: __AARCH_SIZEOF_MINIMAL_ENUM 1
+