diff options
-rw-r--r-- | include/clang/Basic/TargetCXXABI.h | 12 | ||||
-rw-r--r-- | include/clang/Basic/TargetInfo.h | 4 | ||||
-rw-r--r-- | lib/AST/ASTContext.cpp | 83 | ||||
-rw-r--r-- | lib/Basic/Targets.cpp | 188 | ||||
-rw-r--r-- | lib/CodeGen/ABIInfo.h | 6 | ||||
-rw-r--r-- | lib/CodeGen/CodeGenModule.cpp | 1 | ||||
-rw-r--r-- | lib/CodeGen/ItaniumCXXABI.cpp | 14 | ||||
-rw-r--r-- | lib/CodeGen/TargetInfo.cpp | 418 | ||||
-rw-r--r-- | lib/Driver/ToolChains.cpp | 25 | ||||
-rw-r--r-- | lib/Driver/ToolChains.h | 3 | ||||
-rw-r--r-- | lib/Driver/Tools.cpp | 5 | ||||
-rw-r--r-- | test/CodeGen/aarch64-arguments.c | 194 | ||||
-rw-r--r-- | test/CodeGen/aarch64-inline-asm.c | 56 | ||||
-rw-r--r-- | test/CodeGen/aarch64-type-sizes.c | 90 | ||||
-rw-r--r-- | test/CodeGen/aarch64-varargs.c | 238 | ||||
-rw-r--r-- | test/CodeGenCXX/aarch64-arguments.cpp | 5 | ||||
-rw-r--r-- | test/CodeGenCXX/aarch64-cxxabi.cpp | 96 | ||||
-rw-r--r-- | test/Driver/aarch64-features.c | 5 | ||||
-rw-r--r-- | test/Preprocessor/aarch64-target-features.c | 30 |
19 files changed, 1467 insertions, 6 deletions
diff --git a/include/clang/Basic/TargetCXXABI.h b/include/clang/Basic/TargetCXXABI.h index 3c6677c78a..c9d28f8774 100644 --- a/include/clang/Basic/TargetCXXABI.h +++ b/include/clang/Basic/TargetCXXABI.h @@ -63,6 +63,14 @@ public: /// - constructor/destructor signatures. iOS, + /// The generic AArch64 ABI is also a modified version of the Itanium ABI, + /// but it has fewer divergences than the 32-bit ARM ABI. + /// + /// The relevant changes from the generic ABI in this case are: + /// - representation of member function pointers adjusted as in ARM. + /// - guard variables are smaller. + GenericAArch64, + /// The Microsoft ABI is the ABI used by Microsoft Visual Studio (and /// compatible compilers). /// @@ -93,6 +101,7 @@ public: /// \brief Does this ABI generally fall into the Itanium family of ABIs? bool isItaniumFamily() const { switch (getKind()) { + case GenericAArch64: case GenericItanium: case GenericARM: case iOS: @@ -107,6 +116,7 @@ public: /// \brief Is this ABI an MSVC-compatible ABI? bool isMicrosoft() const { switch (getKind()) { + case GenericAArch64: case GenericItanium: case GenericARM: case iOS: @@ -175,6 +185,7 @@ public: case GenericARM: return false; + case GenericAArch64: case GenericItanium: case iOS: // old iOS compilers did not follow this rule case Microsoft: @@ -220,6 +231,7 @@ public: // permanently locked the definition of POD to the rules of C++ TR1, // and that trickles down to all the derived ABIs. case GenericItanium: + case GenericAArch64: case GenericARM: case iOS: return UseTailPaddingUnlessPOD03; diff --git a/include/clang/Basic/TargetInfo.h b/include/clang/Basic/TargetInfo.h index 716d776df6..deaa3eeb77 100644 --- a/include/clang/Basic/TargetInfo.h +++ b/include/clang/Basic/TargetInfo.h @@ -136,6 +136,10 @@ public: /// typedef void* __builtin_va_list; VoidPtrBuiltinVaList, + /// __builtin_va_list as defind by the AArch64 ABI + /// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055a/IHI0055A_aapcs64.pdf + AArch64ABIBuiltinVaList, + /// __builtin_va_list as defined by the PNaCl ABI: /// http://www.chromium.org/nativeclient/pnacl/bitcode-abi#TOC-Machine-Types PNaClABIBuiltinVaList, diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp index 8091d685f7..5d15573038 100644 --- a/lib/AST/ASTContext.cpp +++ b/lib/AST/ASTContext.cpp @@ -597,6 +597,7 @@ CXXABI *ASTContext::createCXXABI(const TargetInfo &T) { case TargetCXXABI::GenericARM: case TargetCXXABI::iOS: return CreateARMCXXABI(*this); + case TargetCXXABI::GenericAArch64: // Same as Itanium at this level case TargetCXXABI::GenericItanium: return CreateItaniumCXXABI(*this); case TargetCXXABI::Microsoft: @@ -5563,6 +5564,85 @@ static TypedefDecl *CreateVoidPtrBuiltinVaListDecl(const ASTContext *Context) { return VaListTypeDecl; } +static TypedefDecl * +CreateAArch64ABIBuiltinVaListDecl(const ASTContext *Context) { + RecordDecl *VaListTagDecl; + if (Context->getLangOpts().CPlusPlus) { + // namespace std { struct __va_list { + NamespaceDecl *NS; + NS = NamespaceDecl::Create(const_cast<ASTContext &>(*Context), + Context->getTranslationUnitDecl(), + /*Inline*/false, SourceLocation(), + SourceLocation(), &Context->Idents.get("std"), + /*PrevDecl*/0); + + VaListTagDecl = CXXRecordDecl::Create(*Context, TTK_Struct, + Context->getTranslationUnitDecl(), + SourceLocation(), SourceLocation(), + &Context->Idents.get("__va_list")); + VaListTagDecl->setDeclContext(NS); + } else { + // struct __va_list + VaListTagDecl = CreateRecordDecl(*Context, TTK_Struct, + Context->getTranslationUnitDecl(), + &Context->Idents.get("__va_list")); + } + + VaListTagDecl->startDefinition(); + + const size_t NumFields = 5; + QualType FieldTypes[NumFields]; + const char *FieldNames[NumFields]; + + // void *__stack; + FieldTypes[0] = Context->getPointerType(Context->VoidTy); + FieldNames[0] = "__stack"; + + // void *__gr_top; + FieldTypes[1] = Context->getPointerType(Context->VoidTy); + FieldNames[1] = "__gr_top"; + + // void *__vr_top; + FieldTypes[2] = Context->getPointerType(Context->VoidTy); + FieldNames[2] = "__vr_top"; + + // int __gr_offs; + FieldTypes[3] = Context->IntTy; + FieldNames[3] = "__gr_offs"; + + // int __vr_offs; + FieldTypes[4] = Context->IntTy; + FieldNames[4] = "__vr_offs"; + + // Create fields + for (unsigned i = 0; i < NumFields; ++i) { + FieldDecl *Field = FieldDecl::Create(const_cast<ASTContext &>(*Context), + VaListTagDecl, + SourceLocation(), + SourceLocation(), + &Context->Idents.get(FieldNames[i]), + FieldTypes[i], /*TInfo=*/0, + /*BitWidth=*/0, + /*Mutable=*/false, + ICIS_NoInit); + Field->setAccess(AS_public); + VaListTagDecl->addDecl(Field); + } + VaListTagDecl->completeDefinition(); + QualType VaListTagType = Context->getRecordType(VaListTagDecl); + Context->VaListTagTy = VaListTagType; + + // } __builtin_va_list; + TypedefDecl *VaListTypedefDecl + = TypedefDecl::Create(const_cast<ASTContext &>(*Context), + Context->getTranslationUnitDecl(), + SourceLocation(), SourceLocation(), + &Context->Idents.get("__builtin_va_list"), + Context->getTrivialTypeSourceInfo(VaListTagType)); + + return VaListTypedefDecl; +} + static TypedefDecl *CreatePowerABIBuiltinVaListDecl(const ASTContext *Context) { // typedef struct __va_list_tag { RecordDecl *VaListTagDecl; @@ -5796,6 +5876,8 @@ static TypedefDecl *CreateVaListDecl(const ASTContext *Context, return CreateCharPtrBuiltinVaListDecl(Context); case TargetInfo::VoidPtrBuiltinVaList: return CreateVoidPtrBuiltinVaListDecl(Context); + case TargetInfo::AArch64ABIBuiltinVaList: + return CreateAArch64ABIBuiltinVaListDecl(Context); case TargetInfo::PowerABIBuiltinVaList: return CreatePowerABIBuiltinVaListDecl(Context); case TargetInfo::X86_64ABIBuiltinVaList: @@ -7642,6 +7724,7 @@ bool ASTContext::isNearlyEmpty(const CXXRecordDecl *RD) const { MangleContext *ASTContext::createMangleContext() { switch (Target->getCXXABI().getKind()) { + case TargetCXXABI::GenericAArch64: case TargetCXXABI::GenericItanium: case TargetCXXABI::GenericARM: case TargetCXXABI::iOS: diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp index b90d49ec84..27984dfbf4 100644 --- a/lib/Basic/Targets.cpp +++ b/lib/Basic/Targets.cpp @@ -3020,6 +3020,186 @@ public: Int64Type = SignedLongLong; } }; +} + +namespace { +class AArch64TargetInfo : public TargetInfo { + static const char * const GCCRegNames[]; + static const TargetInfo::GCCRegAlias GCCRegAliases[]; +public: + AArch64TargetInfo(const std::string& triple) : TargetInfo(triple) { + BigEndian = false; + LongWidth = LongAlign = 64; + LongDoubleWidth = LongDoubleAlign = 128; + PointerWidth = PointerAlign = 64; + SuitableAlign = 128; + DescriptionString = "e-p:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-" + "i64:64:64-i128:128:128-f32:32:32-f64:64:64-" + "f128:128:128-n32:64-S128"; + + WCharType = UnsignedInt; + LongDoubleFormat = &llvm::APFloat::IEEEquad; + + TheCXXABI.set(TargetCXXABI::GenericAArch64); + } + virtual void getTargetDefines(const LangOptions &Opts, + MacroBuilder &Builder) const { + // GCC defines theses currently + Builder.defineMacro("__aarch64__"); + Builder.defineMacro("__AARCH64EL__"); + + // ACLE predefines. Many can only have one possible value on v8 AArch64. + + // FIXME: these were written based on an unreleased version of a 32-bit ACLE + // which was intended to be compatible with a 64-bit implementation. They + // will need updating when a real 64-bit ACLE exists. Particularly pressing + // instances are: __AARCH_ISA_A32, __AARCH_ISA_T32, __ARCH_PCS. + Builder.defineMacro("__AARCH_ACLE", "101"); + Builder.defineMacro("__AARCH", "8"); + Builder.defineMacro("__AARCH_PROFILE", "'A'"); + + Builder.defineMacro("__AARCH_FEATURE_UNALIGNED"); + Builder.defineMacro("__AARCH_FEATURE_CLZ"); + Builder.defineMacro("__AARCH_FEATURE_FMA"); + + // FIXME: ACLE 1.1 reserves bit 4. Will almost certainly come to mean + // 128-bit LDXP present, at which point this becomes 0x1f. + Builder.defineMacro("__AARCH_FEATURE_LDREX", "0xf"); + + // 0xe implies support for half, single and double precision operations. + Builder.defineMacro("__AARCH_FP", "0xe"); + + // PCS specifies this for SysV variants, which is all we support. Other ABIs + // may choose __AARCH_FP16_FORMAT_ALTERNATIVE. + Builder.defineMacro("__AARCH_FP16_FORMAT_IEEE"); + + if (Opts.FastMath || Opts.FiniteMathOnly) + Builder.defineMacro("__AARCH_FP_FAST"); + + if ((Opts.C99 || Opts.C11) && !Opts.Freestanding) + Builder.defineMacro("__AARCH_FP_FENV_ROUNDING"); + + Builder.defineMacro("__AARCH_SIZEOF_WCHAR_T", + Opts.ShortWChar ? "2" : "4"); + + Builder.defineMacro("__AARCH_SIZEOF_MINIMAL_ENUM", + Opts.ShortEnums ? "1" : "4"); + + if (BigEndian) + Builder.defineMacro("__AARCH_BIG_ENDIAN"); + } + virtual void getTargetBuiltins(const Builtin::Info *&Records, + unsigned &NumRecords) const { + Records = 0; + NumRecords = 0; + } + virtual bool hasFeature(StringRef Feature) const { + return Feature == "aarch64"; + } + virtual void getGCCRegNames(const char * const *&Names, + unsigned &NumNames) const; + virtual void getGCCRegAliases(const GCCRegAlias *&Aliases, + unsigned &NumAliases) const; + + virtual bool isCLZForZeroUndef() const { return false; } + + virtual bool validateAsmConstraint(const char *&Name, + TargetInfo::ConstraintInfo &Info) const { + switch (*Name) { + default: return false; + case 'w': // An FP/SIMD vector register + Info.setAllowsRegister(); + return true; + case 'I': // Constant that can be used with an ADD instruction + case 'J': // Constant that can be used with a SUB instruction + case 'K': // Constant that can be used with a 32-bit logical instruction + case 'L': // Constant that can be used with a 64-bit logical instruction + case 'M': // Constant that can be used as a 32-bit MOV immediate + case 'N': // Constant that can be used as a 64-bit MOV immediate + case 'Y': // Floating point constant zero + case 'Z': // Integer constant zero + return true; + case 'Q': // A memory reference with base register and no offset + Info.setAllowsMemory(); + return true; + case 'S': // A symbolic address + Info.setAllowsRegister(); + return true; + case 'U': + // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes, whatever they may be + // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be + // Usa: An absolute symbolic address + // Ush: The high part (bits 32:12) of a pc-relative symbolic address + llvm_unreachable("FIXME: Unimplemented support for bizarre constraints"); + } + } + + virtual const char *getClobbers() const { + // There are no AArch64 clobbers shared by all asm statements. + return ""; + } + + virtual BuiltinVaListKind getBuiltinVaListKind() const { + return TargetInfo::AArch64ABIBuiltinVaList; + } +}; + +const char * const AArch64TargetInfo::GCCRegNames[] = { + "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", + "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", + "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", + "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wsp", "wzr", + + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28", "x29", "x30", "sp", "xzr", + + "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", + "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15", + "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23", + "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31", + + "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7", + "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15", + "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23", + "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31", + + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", + "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", + "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", + + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", + + "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", + "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", + "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23", + "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" +}; + +void AArch64TargetInfo::getGCCRegNames(const char * const *&Names, + unsigned &NumNames) const { + Names = GCCRegNames; + NumNames = llvm::array_lengthof(GCCRegNames); +} + +const TargetInfo::GCCRegAlias AArch64TargetInfo::GCCRegAliases[] = { + { { "x16" }, "ip0"}, + { { "x17" }, "ip1"}, + { { "x29" }, "fp" }, + { { "x30" }, "lr" } +}; + +void AArch64TargetInfo::getGCCRegAliases(const GCCRegAlias *&Aliases, + unsigned &NumAliases) const { + Aliases = GCCRegAliases; + NumAliases = llvm::array_lengthof(GCCRegAliases); + +} } // end anonymous namespace namespace { @@ -4540,6 +4720,14 @@ static TargetInfo *AllocateTarget(const std::string &T) { case llvm::Triple::hexagon: return new HexagonTargetInfo(T); + case llvm::Triple::aarch64: + switch (os) { + case llvm::Triple::Linux: + return new LinuxTargetInfo<AArch64TargetInfo>(T); + default: + return new AArch64TargetInfo(T); + } + case llvm::Triple::arm: case llvm::Triple::thumb: if (Triple.isOSDarwin()) diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h index 1da1689a65..10e2f60c86 100644 --- a/lib/CodeGen/ABIInfo.h +++ b/lib/CodeGen/ABIInfo.h @@ -102,8 +102,10 @@ namespace clang { return ABIArgInfo(Ignore, 0, 0, false, false, false, false, 0); } static ABIArgInfo getIndirect(unsigned Alignment, bool ByVal = true - , bool Realign = false) { - return ABIArgInfo(Indirect, 0, Alignment, ByVal, Realign, false, false, 0); + , bool Realign = false + , llvm::Type *Padding = 0) { + return ABIArgInfo(Indirect, 0, Alignment, ByVal, Realign, false, false, + Padding); } static ABIArgInfo getIndirectInReg(unsigned Alignment, bool ByVal = true , bool Realign = false) { diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 80b52978c7..91f22b1cd6 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -54,6 +54,7 @@ static const char AnnotationSection[] = "llvm.metadata"; static CGCXXABI &createCXXABI(CodeGenModule &CGM) { switch (CGM.getContext().getTargetInfo().getCXXABI().getKind()) { + case TargetCXXABI::GenericAArch64: case TargetCXXABI::GenericARM: case TargetCXXABI::iOS: case TargetCXXABI::GenericItanium: diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp index fa25a2707d..a3d8cec344 100644 --- a/lib/CodeGen/ItaniumCXXABI.cpp +++ b/lib/CodeGen/ItaniumCXXABI.cpp @@ -181,6 +181,12 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) { case TargetCXXABI::iOS: return new ARMCXXABI(CGM); + // Note that AArch64 uses the generic ItaniumCXXABI class since it doesn't + // include the other 32-bit ARM oddities: constructor/destructor return values + // and array cookies. + case TargetCXXABI::GenericAArch64: + return new ItaniumCXXABI(CGM, /*IsARM = */ true); + case TargetCXXABI::GenericItanium: return new ItaniumCXXABI(CGM); @@ -1015,8 +1021,9 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, if (useInt8GuardVariable) { guardTy = CGF.Int8Ty; } else { - // Guard variables are 64 bits in the generic ABI and 32 bits on ARM. - guardTy = (IsARM ? CGF.Int32Ty : CGF.Int64Ty); + // Guard variables are 64 bits in the generic ABI and size width on ARM + // (i.e. 32-bit on AArch32, 64-bit on AArch64). + guardTy = (IsARM ? CGF.SizeTy : CGF.Int64Ty); } llvm::PointerType *guardPtrTy = guardTy->getPointerTo(); @@ -1059,7 +1066,8 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, // } if (IsARM && !useInt8GuardVariable) { llvm::Value *V = Builder.CreateLoad(guard); - V = Builder.CreateAnd(V, Builder.getInt32(1)); + llvm::Value *Test1 = llvm::ConstantInt::get(guardTy, 1); + V = Builder.CreateAnd(V, Test1); isInitialized = Builder.CreateIsNull(V, "guard.uninitialized"); // Itanium C++ ABI 3.3.2: diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index a2e575d3f0..b870ae9eb1 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -95,6 +95,7 @@ unsigned TargetCodeGenInfo::getSizeOfUnwindException() const { // x86-32 FreeBSD, Linux, Darwin // PowerPC Linux, Darwin // ARM Darwin (*not* EABI) + // AArch64 Linux return 32; } @@ -3591,6 +3592,420 @@ llvm::Value *NaClARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, } //===----------------------------------------------------------------------===// +// AArch64 ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { + +class AArch64ABIInfo : public ABIInfo { +public: + AArch64ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} + +private: + // The AArch64 PCS is explicit about return types and argument types being + // handled identically, so we don't need to draw a distinction between + // Argument and Return classification. + ABIArgInfo classifyGenericType(QualType Ty, int &FreeIntRegs, + int &FreeVFPRegs) const; + + ABIArgInfo tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded, bool IsInt, + llvm::Type *DirectTy = 0) const; + + virtual void computeInfo(CGFunctionInfo &FI) const; + + virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty, + CodeGenFunction &CGF) const; +}; + +class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { +public: + AArch64TargetCodeGenInfo(CodeGenTypes &CGT) + :TargetCodeGenInfo(new AArch64ABIInfo(CGT)) {} + + const AArch64ABIInfo &getABIInfo() const { + return static_cast<const AArch64ABIInfo&>(TargetCodeGenInfo::getABIInfo()); + } + + int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const { + return 31; + } + + bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, + llvm::Value *Address) const { + // 0-31 are x0-x30 and sp: 8 bytes each + llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); + AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 31); + + // 64-95 are v0-v31: 16 bytes each + llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16); + AssignToArrayRange(CGF.Builder, Address, Sixteen8, 64, 95); + + return false; + } + +}; + +} + +void AArch64ABIInfo::computeInfo(CGFunctionInfo &FI) const { + int FreeIntRegs = 8, FreeVFPRegs = 8; + + FI.getReturnInfo() = classifyGenericType(FI.getReturnType(), + FreeIntRegs, FreeVFPRegs); + + FreeIntRegs = FreeVFPRegs = 8; + for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); + it != ie; ++it) { + it->info = classifyGenericType(it->type, FreeIntRegs, FreeVFPRegs); + + } +} + +ABIArgInfo +AArch64ABIInfo::tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded, + bool IsInt, llvm::Type *DirectTy) const { + if (FreeRegs >= RegsNeeded) { + FreeRegs -= RegsNeeded; + return ABIArgInfo::getDirect(DirectTy); + } + + llvm::Type *Padding = 0; + + // We need padding so that later arguments don't get filled in anyway. That + // wouldn't happen if only ByVal arguments followed in the same category, but + // a large structure will simply seem to be a pointer as far as LLVM is + // concerned. + if (FreeRegs > 0) { + if (IsInt) + Padding = llvm::Type::getInt64Ty(getVMContext()); + else + Padding = llvm::Type::getFloatTy(getVMContext()); + + // Either [N x i64] or [N x float]. + Padding = llvm::ArrayType::get(Padding, FreeRegs); + FreeRegs = 0; + } + + return ABIArgInfo::getIndirect(getContext().getTypeAlign(Ty) / 8, + /*IsByVal=*/ true, /*Realign=*/ false, + Padding); +} + + +ABIArgInfo AArch64ABIInfo::classifyGenericType(QualType Ty, + int &FreeIntRegs, + int &FreeVFPRegs) const { + // Can only occurs for return, but harmless otherwise. + if (Ty->isVoidType()) + return ABIArgInfo::getIgnore(); + + // Large vector types should be returned via memory. There's no such concept + // in the ABI, but they'd be over 16 bytes anyway so no matter how they're + // classified they'd go into memory (see B.3). + if (Ty->isVectorType() && getContext().getTypeSize(Ty) > 128) { + if (FreeIntRegs > 0) + --FreeIntRegs; + return ABIArgInfo::getIndirect(0, /*ByVal=*/false); + } + + // All non-aggregate LLVM types have a concrete ABI representation so they can + // be passed directly. After this block we're guaranteed to be in a + // complicated case. + if (!isAggregateTypeForABI(Ty)) { + // Treat an enum type as its underlying type. + if (const EnumType *EnumTy = Ty->getAs<EnumType>()) + Ty = EnumTy->getDecl()->getIntegerType(); + + if (Ty->isFloatingType() || Ty->isVectorType()) + return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ false); + + assert(getContext().getTypeSize(Ty) <= 128 && + "unexpectedly large scalar type"); + + int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1; + + // If the type may need padding registers to ensure "alignment", we must be + // careful when this is accounted for. Increasing the effective size covers + // all cases. + if (getContext().getTypeAlign(Ty) == 128) + RegsNeeded += FreeIntRegs % 2 != 0; + + return tryUseRegs(Ty, FreeIntRegs, RegsNeeded, /*IsInt=*/ true); + } + + // Structures with either a non-trivial destructor or a non-trivial + // copy constructor are always indirect. + if (isRecordWithNonTrivialDestructorOrCopyConstructor(Ty)) { + if (FreeIntRegs > 0) + --FreeIntRegs; + return ABIArgInfo::getIndirect(0, /*ByVal=*/false); + } + + if (isEmptyRecord(getContext(), Ty, true)) { + if (!getContext().getLangOpts().CPlusPlus) { + // Empty structs outside C++ mode are a GNU extension, so no ABI can + // possibly tell us what to do. It turns out (I believe) that GCC ignores + // the object for parameter-passsing purposes. + return ABIArgInfo::getIgnore(); + } + + // The combination of C++98 9p5 (sizeof(struct) != 0) and the pseudocode + // description of va_arg in the PCS require that an empty struct does + // actually occupy space for parameter-passing. I'm hoping for a + // clarification giving an explicit paragraph to point to in future. + return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ true, + llvm::Type::getInt8Ty(getVMContext())); + } + + // Homogeneous vector aggregates get passed in registers or on the stack. + const Type *Base = 0; + uint64_t NumMembers = 0; + if (isHomogeneousAggregate(Ty, Base, getContext(), &NumMembers)) { + assert(Base && "Base class should be set for homogeneous aggregate"); + // Homogeneous aggregates are passed and returned directly. + return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ NumMembers, + /*IsInt=*/ false); + } + + uint64_t Size = getContext().getTypeSize(Ty); + if (Size <= 128) { + // Small structs can use the same direct type whether they're in registers + // or on the stack. + llvm::Type *BaseTy; + unsigned NumBases; + int SizeInRegs = (Size + 63) / 64; + + if (getContext().getTypeAlign(Ty) == 128) { + BaseTy = llvm::Type::getIntNTy(getVMContext(), 128); + NumBases = 1; + + // If the type may need padding registers to ensure "alignment", we must + // be careful when this is accounted for. Increasing the effective size + // covers all cases. + SizeInRegs += FreeIntRegs % 2 != 0; + } else { + BaseTy = llvm::Type::getInt64Ty(getVMContext()); + NumBases = SizeInRegs; + } + llvm::Type *DirectTy = llvm::ArrayType::get(BaseTy, NumBases); + + return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ SizeInRegs, + /*IsInt=*/ true, DirectTy); + } + + // If the aggregate is > 16 bytes, it's passed and returned indirectly. In + // LLVM terms the return uses an "sret" pointer, but that's handled elsewhere. + --FreeIntRegs; + return ABIArgInfo::getIndirect(0, /* byVal = */ false); +} + +llvm::Value *AArch64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, + CodeGenFunction &CGF) const { + // The AArch64 va_list type and handling is specified in the Procedure Call + // Standard, section B.4: + // + // struct { + // void *__stack; + // void *__gr_top; + // void *__vr_top; + // int __gr_offs; + // int __vr_offs; + // }; + + assert(!CGF.CGM.getDataLayout().isBigEndian() + && "va_arg not implemented for big-endian AArch64"); + + int FreeIntRegs = 8, FreeVFPRegs = 8; + Ty = CGF.getContext().getCanonicalType(Ty); + ABIArgInfo AI = classifyGenericType(Ty, FreeIntRegs, FreeVFPRegs); + + llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg"); + llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg"); + llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack"); + llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end"); + + llvm::Value *reg_offs_p = 0, *reg_offs = 0; + int reg_top_index; + int RegSize; + if (FreeIntRegs < 8) { + assert(FreeVFPRegs == 8 && "Arguments never split between int & VFP regs"); + // 3 is the field number of __gr_offs + reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p"); + reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs"); + reg_top_index = 1; // field number for __gr_top + RegSize = 8 * (8 - FreeIntRegs); + } else { + assert(FreeVFPRegs < 8 && "Argument must go in VFP or int regs"); + // 4 is the field number of __vr_offs. + reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p"); + reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs"); + reg_top_index = 2; // field number for __vr_top + RegSize = 16 * (8 - FreeVFPRegs); + } + + //======================================= + // Find out where argument was passed + //======================================= + + // If reg_offs >= 0 we're already using the stack for this type of + // argument. We don't want to keep updating reg_offs (in case it overflows, + // though anyone passing 2GB of arguments, each at most 16 bytes, deserves + // whatever they get). + llvm::Value *UsingStack = 0; + UsingStack = CGF.Builder.CreateICmpSGE(reg_offs, + llvm::ConstantInt::get(CGF.Int32Ty, 0)); + + CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock); + + // Otherwise, at least some kind of argument could go in these registers, the + // quesiton is whether this particular type is too big. + CGF.EmitBlock(MaybeRegBlock); + + // Integer arguments may need to correct register alignment (for example a + // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we + // align __gr_offs to calculate the potential address. + if (FreeIntRegs < 8 && AI.isDirect() && getContext().getTypeAlign(Ty) > 64) { + int Align = getContext().getTypeAlign(Ty) / 8; + + reg_offs = CGF.Builder.CreateAdd(reg_offs, + llvm::ConstantInt::get(CGF.Int32Ty, Align - 1), + "align_regoffs"); + reg_offs = CGF.Builder.CreateAnd(reg_offs, + llvm::ConstantInt::get(CGF.Int32Ty, -Align), + "aligned_regoffs"); + } + + // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list. + llvm::Value *NewOffset = 0; + NewOffset = CGF.Builder.CreateAdd(reg_offs, + llvm::ConstantInt::get(CGF.Int32Ty, RegSize), + "new_reg_offs"); + CGF.Builder.CreateStore(NewOffset, reg_offs_p); + + // Now we're in a position to decide whether this argument really was in + // registers or not. + llvm::Value *InRegs = 0; + InRegs = CGF.Builder.CreateICmpSLE(NewOffset, + llvm::ConstantInt::get(CGF.Int32Ty, 0), + "inreg"); + + CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock); + + //======================================= + // Argument was in registers + //======================================= + + // Now we emit the code for if the argument was originally passed in + // registers. First start the appropriate block: + CGF.EmitBlock(InRegBlock); + + llvm::Value *reg_top_p = 0, *reg_top = 0; + reg_top_p = CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p"); + reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top"); + llvm::Value *BaseAddr = CGF.Builder.CreateGEP(reg_top, reg_offs); + llvm::Value *RegAddr = 0; + llvm::Type *MemTy = llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty)); + + if (!AI.isDirect()) { + // If it's been passed indirectly (actually a struct), whatever we find from + // stored registers or on the stack will actually be a struct **. + MemTy = llvm::PointerType::getUnqual(MemTy); + } + + const Type *Base = 0; + uint64_t NumMembers; + if (isHomogeneousAggregate(Ty, Base, getContext(), &NumMembers) + && NumMembers > 1) { + // Homogeneous aggregates passed in registers will have their elements split + // and stored 16-bytes apart regardless of size (they're notionally in qN, + // qN+1, ...). We reload and store into a temporary local variable + // contiguously. + assert(AI.isDirect() && "Homogeneous aggregates should be passed directly"); + llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0)); + llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers); + llvm::Value *Tmp = CGF.CreateTempAlloca(HFATy); + + for (unsigned i = 0; i < NumMembers; ++i) { + llvm::Value *BaseOffset = llvm::ConstantInt::get(CGF.Int32Ty, 16 * i); + llvm::Value *LoadAddr = CGF.Builder.CreateGEP(BaseAddr, BaseOffset); + LoadAddr = CGF.Builder.CreateBitCast(LoadAddr, + llvm::PointerType::getUnqual(BaseTy)); + llvm::Value *StoreAddr = CGF.Builder.CreateStructGEP(Tmp, i); + + llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr); + CGF.Builder.CreateStore(Elem, StoreAddr); + } + + RegAddr = CGF.Builder.CreateBitCast(Tmp, MemTy); + } else { + // Otherwise the object is contiguous in memory + RegAddr = CGF.Builder.CreateBitCast(BaseAddr, MemTy); + } + + CGF.EmitBranch(ContBlock); + + //======================================= + // Argument was on the stack + //======================================= + CGF.EmitBlock(OnStackBlock); + + llvm::Value *stack_p = 0, *OnStackAddr = 0; + stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p"); + OnStackAddr = CGF.Builder.CreateLoad(stack_p, "stack"); + + // Again, stack arguments may need realigmnent. In this case both integer and + // floating-point ones might be affected. + if (AI.isDirect() && getContext().getTypeAlign(Ty) > 64) { + int Align = getContext().getTypeAlign(Ty) / 8; + + OnStackAddr = CGF.Builder.CreatePtrToInt(OnStackAddr, CGF.Int64Ty); + + OnStackAddr = CGF.Builder.CreateAdd(OnStackAddr, + llvm::ConstantInt::get(CGF.Int64Ty, Align - 1), + "align_stack"); + OnStackAddr = CGF.Builder.CreateAnd(OnStackAddr, + llvm::ConstantInt::get(CGF.Int64Ty, -Align), + "align_stack"); + + OnStackAddr = CGF.Builder.CreateIntToPtr(OnStackAddr, CGF.Int8PtrTy); + } + + uint64_t StackSize; + if (AI.isDirect()) + StackSize = getContext().getTypeSize(Ty) / 8; + else + StackSize = 8; + + // All stack slots are 8 bytes + StackSize = llvm::RoundUpToAlignment(StackSize, 8); + + llvm::Value *StackSizeC = llvm::ConstantInt::get(CGF.Int32Ty, StackSize); + llvm::Value *NewStack = CGF.Builder.CreateGEP(OnStackAddr, StackSizeC, + "new_stack"); + + // Write the new value of __stack for the next call to va_arg + CGF.Builder.CreateStore(NewStack, stack_p); + + OnStackAddr = CGF.Builder.CreateBitCast(OnStackAddr, MemTy); + + CGF.EmitBranch(ContBlock); + + //======================================= + // Tidy up + //======================================= + CGF.EmitBlock(ContBlock); + + llvm::PHINode *ResAddr = CGF.Builder.CreatePHI(MemTy, 2, "vaarg.addr"); + ResAddr->addIncoming(RegAddr, InRegBlock); + ResAddr->addIncoming(OnStackAddr, OnStackBlock); + + if (AI.isDirect()) + return ResAddr; + + return CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"); +} + +//===----------------------------------------------------------------------===// // NVPTX ABI Implementation //===----------------------------------------------------------------------===// @@ -4397,6 +4812,9 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { case llvm::Triple::mips64el: return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, false)); + case llvm::Triple::aarch64: + return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types)); + case llvm::Triple::arm: case llvm::Triple::thumb: { diff --git a/lib/Driver/ToolChains.cpp b/lib/Driver/ToolChains.cpp index 6f68f1c577..39f0f06de8 100644 --- a/lib/Driver/ToolChains.cpp +++ b/lib/Driver/ToolChains.cpp @@ -1081,6 +1081,12 @@ Generic_GCC::GCCInstallationDetector::GCCInstallationDetector( // Declare a bunch of static data sets that we'll select between below. These // are specifically designed to always refer to string literals to avoid any // lifetime or initialization issues. + static const char *const AArch64LibDirs[] = { "/lib" }; + static const char *const AArch64Triples[] = { + "aarch64-none-linux-gnu", + "aarch64-linux-gnu" + }; + static const char *const ARMLibDirs[] = { "/lib" }; static const char *const ARMTriples[] = { "arm-linux-gnueabi", @@ -1146,6 +1152,16 @@ Generic_GCC::GCCInstallationDetector::GCCInstallationDetector( }; switch (TargetTriple.getArch()) { + case llvm::Triple::aarch64: + LibDirs.append(AArch64LibDirs, AArch64LibDirs + + llvm::array_lengthof(AArch64LibDirs)); + TripleAliases.append( + AArch64Triples, AArch64Triples + llvm::array_lengthof(AArch64Triples)); + MultiarchLibDirs.append( + AArch64LibDirs, AArch64LibDirs + llvm::array_lengthof(AArch64LibDirs)); + MultiarchTripleAliases.append( + AArch64Triples, AArch64Triples + llvm::array_lengthof(AArch64Triples)); + break; case llvm::Triple::arm: case llvm::Triple::thumb: LibDirs.append(ARMLibDirs, ARMLibDirs + llvm::array_lengthof(ARMLibDirs)); @@ -2218,6 +2234,9 @@ static std::string getMultiarchTriple(const llvm::Triple TargetTriple, if (llvm::sys::fs::exists(SysRoot + "/lib/x86_64-linux-gnu")) return "x86_64-linux-gnu"; return TargetTriple.str(); + case llvm::Triple::aarch64: + if (llvm::sys::fs::exists(SysRoot + "/lib/aarch64-linux-gnu")) + return "aarch64-linux-gnu"; case llvm::Triple::mips: if (llvm::sys::fs::exists(SysRoot + "/lib/mips-linux-gnu")) return "mips-linux-gnu"; @@ -2444,6 +2463,7 @@ void Linux::addClangTargetOptions(const ArgList &DriverArgs, const Generic_GCC::GCCVersion &V = GCCInstallation.getVersion(); bool UseInitArrayDefault = V >= Generic_GCC::GCCVersion::Parse("4.7.0") || + getTriple().getArch() == llvm::Triple::aarch64 || getTriple().getEnvironment() == llvm::Triple::Android; if (DriverArgs.hasFlag(options::OPT_fuse_init_array, options::OPT_fno_use_init_array, @@ -2506,6 +2526,9 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, "/usr/include/i686-linux-gnu", "/usr/include/i486-linux-gnu" }; + const StringRef AArch64MultiarchIncludeDirs[] = { + "/usr/include/aarch64-linux-gnu" + }; const StringRef ARMMultiarchIncludeDirs[] = { "/usr/include/arm-linux-gnueabi" }; @@ -2529,6 +2552,8 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, MultiarchIncludeDirs = X86_64MultiarchIncludeDirs; } else if (getTriple().getArch() == llvm::Triple::x86) { MultiarchIncludeDirs = X86MultiarchIncludeDirs; + } else if (getTriple().getArch() == llvm::Triple::aarch64) { + MultiarchIncludeDirs = AArch64MultiarchIncludeDirs; } else if (getTriple().getArch() == llvm::Triple::arm) { if (getTriple().getEnvironment() == llvm::Triple::GNUEABIHF) MultiarchIncludeDirs = ARMHFMultiarchIncludeDirs; diff --git a/lib/Driver/ToolChains.h b/lib/Driver/ToolChains.h index c0a9646c28..b313ab610c 100644 --- a/lib/Driver/ToolChains.h +++ b/lib/Driver/ToolChains.h @@ -388,7 +388,8 @@ public: virtual bool IsIntegratedAssemblerDefault() const { // Default integrated assembler to on for x86. - return (getTriple().getArch() == llvm::Triple::x86 || + return (getTriple().getArch() == llvm::Triple::aarch64 || + getTriple().getArch() == llvm::Triple::x86 || getTriple().getArch() == llvm::Triple::x86_64); } }; diff --git a/lib/Driver/Tools.cpp b/lib/Driver/Tools.cpp index f925f762e5..cb7cc2381d 100644 --- a/lib/Driver/Tools.cpp +++ b/lib/Driver/Tools.cpp @@ -549,6 +549,7 @@ static bool isSignedCharDefault(const llvm::Triple &Triple) { default: return true; + case llvm::Triple::aarch64: case llvm::Triple::arm: case llvm::Triple::ppc: case llvm::Triple::ppc64: @@ -5578,6 +5579,8 @@ void linuxtools::Link::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-m"); if (ToolChain.getArch() == llvm::Triple::x86) CmdArgs.push_back("elf_i386"); + else if (ToolChain.getArch() == llvm::Triple::aarch64) + CmdArgs.push_back("aarch64linux"); else if (ToolChain.getArch() == llvm::Triple::arm || ToolChain.getArch() == llvm::Triple::thumb) CmdArgs.push_back("armelf_linux_eabi"); @@ -5626,6 +5629,8 @@ void linuxtools::Link::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("/system/bin/linker"); else if (ToolChain.getArch() == llvm::Triple::x86) CmdArgs.push_back("/lib/ld-linux.so.2"); + else if (ToolChain.getArch() == llvm::Triple::aarch64) + CmdArgs.push_back("/lib/ld-linux-aarch64.so.1"); else if (ToolChain.getArch() == llvm::Triple::arm || ToolChain.getArch() == llvm::Triple::thumb) { if (ToolChain.getTriple().getEnvironment() == llvm::Triple::GNUEABIHF) diff --git a/test/CodeGen/aarch64-arguments.c b/test/CodeGen/aarch64-arguments.c new file mode 100644 index 0000000000..2255b8244d --- /dev/null +++ b/test/CodeGen/aarch64-arguments.c @@ -0,0 +1,194 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -w -o - %s | FileCheck -check-prefix=PCS %s + +// Sign extension is performed by the callee on AArch64, which means +// that we *shouldn't* tag arguments and returns with their extension. + +// PCS: define i8 @f0(i16 %a) +char f0(short a) { + return a; +} + +// PCS: define [1 x i64] @f1() +struct s1 { char f0; }; +struct s1 f1(void) {} + +// PCS: define [1 x i64] @f2() +struct s2 { short f0; }; +struct s2 f2(void) {} + +// PCS: define [1 x i64] @f3() +struct s3 { int f0; }; +struct s3 f3(void) {} + +// PCS: define [1 x i64] @f4() +struct s4 { struct s4_0 { int f0; } f0; }; +struct s4 f4(void) {} + +// PCS: define [1 x i64] @f5() +struct s5 { struct { } f0; int f1; }; +struct s5 f5(void) {} + +// PCS: define [1 x i64] @f6() +struct s6 { int f0[1]; }; +struct s6 f6(void) {} + +// PCS: define void @f7() +struct s7 { struct { int : 0; } f0; }; +struct s7 f7(void) {} + +// PCS: define void @f8() +struct s8 { struct { int : 0; } f0[1]; }; +struct s8 f8(void) {} + +// PCS: define [1 x i64] @f9() +struct s9 { long f0; int : 0; }; +struct s9 f9(void) {} + +// PCS: define [1 x i64] @f10() +struct s10 { long f0; int : 0; int : 0; }; +struct s10 f10(void) {} + +// PCS: define [1 x i64] @f11() +struct s11 { int : 0; long f0; }; +struct s11 f11(void) {} + +// PCS: define [1 x i64] @f12() +union u12 { char f0; short f1; int f2; long f3; }; +union u12 f12(void) {} + +// PCS: define %struct.s13 @f13() +struct s13 { float f0; }; +struct s13 f13(void) {} + +// PCS: define %union.u14 @f14() +union u14 { float f0; }; +union u14 f14(void) {} + +// PCS: define void @f15() +void f15(struct s7 a0) {} + +// PCS: define void @f16() +void f16(struct s8 a0) {} + +// PCS: define [1 x i64] @f17() +struct s17 { short f0 : 13; char f1 : 4; }; +struct s17 f17(void) {} + +// PCS: define [1 x i64] @f18() +struct s18 { short f0; char f1 : 4; }; +struct s18 f18(void) {} + +// PCS: define [1 x i64] @f19() +struct s19 { long f0; struct s8 f1; }; +struct s19 f19(void) {} + +// PCS: define [1 x i64] @f20() +struct s20 { struct s8 f1; long f0; }; +struct s20 f20(void) {} + +// PCS: define [1 x i64] @f21() +struct s21 { struct {} f1; long f0 : 4; }; +struct s21 f21(void) {} + +// PCS: define { float, float } @f22() +// PCS: define { double, double } @f23( +_Complex float f22(void) {} +_Complex double f23(void) {} + +// PCS: define [1 x i64] @f24() +struct s24 { _Complex char f0; }; +struct s24 f24() {} + +// PCS: define [1 x i64] @f25() +struct s25 { _Complex short f0; }; +struct s25 f25() {} + +// PCS: define [1 x i64] @f26() +struct s26 { _Complex int f0; }; +struct s26 f26() {} + +// PCS: define [2 x i64] @f27() +struct s27 { _Complex long f0; }; +struct s27 f27() {} + +// PCS: define void @f28(i8 %a, i16 %b, i32 %c, i64 %d, float %e, double %f) +void f28(char a, short b, int c, long d, float e, double f) {} + +// PCS: define void @f29([2 x i64] %a +struct s29 { int arr[4]; }; +void f29(struct s29 a) {} + +// PCS: define void @f30(%struct.s30* %a) +struct s30 { int arr[4]; char c;}; +void f30(struct s30 a) {} + +// PCS: define void @f31([4 x double] %a +struct s31 { double arr[4]; }; +void f31(struct s31 a) {} + +// PCS: define void @f32(%struct.s32* %a) +struct s32 { float arr[5]; }; +void f32(struct s32 a) {} + +// Not the only solution, but it *is* an HFA. +// PCS: define void @f33([3 x float] %a.coerce0, float %a.coerce1) +struct s33 { float arr[3]; float a; }; +void f33(struct s33 a) {} + +// PCS: define void @f34(%struct.s34* sret noalias +struct s34 { int a[4]; char b }; +struct s34 f34(void) {} + +// PCS: define void @f35() +struct s35 {}; +void f35(struct s35 a) {} + +// Check padding is added: +// PCS: @f36(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, [1 x i64], %struct.s36* byval align 8 %stacked) +struct s36 { long a, b; }; +void f36(int x0, int x1, int x2, int x3, int x4, int x5, int x6, struct s36 stacked) {} + +// But only once: +// PCS: @f37(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, [1 x i64], %struct.s37* byval align 8 %stacked, %struct.s37* byval align 8 %stacked2) +struct s37 { long a, b; }; +void f37(int x0, int x1, int x2, int x3, int x4, int x5, int x6, struct s37 stacked, struct s37 stacked2) {} + +// Check for HFA padding args. Also, they should not end up on the stack in a +// way which will have holes in when lowered further by LLVM. In particular [3 x +// float] would be unacceptable. + +// PCS: @f38(float %s0, double %d1, float %s2, float %s3, float %s4, float %s5, [2 x float], %struct.s38* byval align 4 %stacked) +struct s38 { float a, b, c; }; +void f38(float s0, double d1, float s2, float s3, float s4, float s5, struct s38 stacked) {} + +// Check both VFP and integer arguments are padded (also that pointers and enums +// get counted as integer types correctly). +struct s39_int { long a, b; }; +struct s39_float { float a, b, c, d; }; +enum s39_enum { Val1, Val2 }; +// PCS: @f39(float %s0, i32 %x0, float %s1, i32* %x1, float %s2, i32 %x2, float %s3, float %s4, i32 %x3, [3 x float], %struct.s39_float* byval align 4 %stacked, i32 %x4, i32 %x5, i32 %x6, [1 x i64], %struct.s39_int* byval align 8 %stacked2) +void f39(float s0, int x0, float s1, int *x1, float s2, enum s39_enum x2, float s3, float s4, + int x3, struct s39_float stacked, int x4, int x5, int x6, + struct s39_int stacked2) {} + +struct s40 { __int128 a; }; +// PCS: @f40(i32 %x0, [1 x i128] %x2_3.coerce, i32 %x4, i32 %x5, i32 %x6, [1 x i64], %struct.s40* byval align 16 %stacked) +void f40(int x0, struct s40 x2_3, int x4, int x5, int x6, struct s40 stacked) {} + +// Checking: __int128 will get properly aligned type, with padding so big struct doesn't use x7. +struct s41 { int arr[5]; }; +// PCS: @f41(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, [1 x i64], i128* byval align 16, %struct.s41* %stacked2) +int f41(int x0, int x1, int x2, int x3, int x4, int x5, int x6, __int128 stacked, struct s41 stacked2) {} + +// Checking: __int128 needing to be aligned in registers will consume correct +// number. Previously padding was inserted before "stacked" because x6_7 was +// "allocated" to x5 and x6 by clang. +// PCS: @f42(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i128 %x6_7, i128* byval align 16) +void f42(int x0, int x1, int x2, int x3, int x4, __int128 x6_7, __int128 stacked) {} + +// Checking: __fp16 is extended to double when calling variadic functions +void variadic(int a, ...); +void f43(__fp16 *in) { + variadic(42, *in); +// CHECK: call void @variadic(i32 42, double +} diff --git a/test/CodeGen/aarch64-inline-asm.c b/test/CodeGen/aarch64-inline-asm.c new file mode 100644 index 0000000000..ca39c6e7ff --- /dev/null +++ b/test/CodeGen/aarch64-inline-asm.c @@ -0,0 +1,56 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -o - %s | FileCheck %s + +// The only part clang really deals with is the lvalue/rvalue +// distinction on constraints. It's sufficient to emit llvm and make +// sure that's sane. + +long var; + +void test_generic_constraints(int var32, long var64) { + asm("add %0, %1, %1" : "=r"(var32) : "0"(var32)); +// CHECK: [[R32_ARG:%[a-zA-Z0-9]+]] = load i32* +// CHECK: call i32 asm "add $0, $1, $1", "=r,0"(i32 [[R32_ARG]]) + + asm("add %0, %1, %1" : "=r"(var64) : "0"(var64)); +// CHECK: [[R32_ARG:%[a-zA-Z0-9]+]] = load i64* +// CHECK: call i64 asm "add $0, $1, $1", "=r,0"(i64 [[R32_ARG]]) + + asm("ldr %0, %1" : "=r"(var32) : "m"(var)); + asm("ldr %0, [%1]" : "=r"(var64) : "r"(&var)); +// CHECK: call i32 asm "ldr $0, $1", "=r,*m"(i64* @var) +// CHECK: call i64 asm "ldr $0, [$1]", "=r,r"(i64* @var) +} + +float f; +double d; +void test_constraint_w() { + asm("fadd %s0, %s1, %s1" : "=w"(f) : "w"(f)); +// CHECK: [[FLT_ARG:%[a-zA-Z_0-9]+]] = load float* @f +// CHECK: call float asm "fadd ${0:s}, ${1:s}, ${1:s}", "=w,w"(float [[FLT_ARG]]) + + asm("fadd %d0, %d1, %d1" : "=w"(d) : "w"(d)); +// CHECK: [[DBL_ARG:%[a-zA-Z_0-9]+]] = load double* @d +// CHECK: call double asm "fadd ${0:d}, ${1:d}, ${1:d}", "=w,w"(double [[DBL_ARG]]) +} + +void test_constraints_immed(void) { + asm("add x0, x0, %0" : : "I"(4095) : "x0"); + asm("and w0, w0, %0" : : "K"(0xaaaaaaaa) : "w0"); + asm("and x0, x0, %0" : : "L"(0xaaaaaaaaaaaaaaaa) : "x0"); +// CHECK: call void asm sideeffect "add x0, x0, $0", "I,~{x0}"(i32 4095) +// CHECK: call void asm sideeffect "and w0, w0, $0", "K,~{w0}"(i32 -1431655766) +// CHECK: call void asm sideeffect "and x0, x0, $0", "L,~{x0}"(i64 -6148914691236517206) +} + +void test_constraint_S(void) { + int *addr; + asm("adrp %0, %A1\n\t" + "add %0, %0, %L1" : "=r"(addr) : "S"(&var)); +// CHECK: call i32* asm "adrp $0, ${1:A}\0A\09add $0, $0, ${1:L}", "=r,S"(i64* @var) +} + +void test_constraint_Q(void) { + int val; + asm("ldxr %0, %1" : "=r"(val) : "Q"(var)); +// CHECK: call i32 asm "ldxr $0, $1", "=r,*Q"(i64* @var) +} diff --git a/test/CodeGen/aarch64-type-sizes.c b/test/CodeGen/aarch64-type-sizes.c new file mode 100644 index 0000000000..3b9c9fc426 --- /dev/null +++ b/test/CodeGen/aarch64-type-sizes.c @@ -0,0 +1,90 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -w -o - %s | FileCheck %s + +// char by definition has size 1 + +int check_short() { + return sizeof(short); +// CHECK: ret i32 2 +} + +int check_int() { + return sizeof(int); +// CHECK: ret i32 4 +} + +int check_long() { +// Both 4 and 8 are permitted under the PCS, Linux says 8! + return sizeof(long); +// CHECK: ret i32 8 +} + +int check_longlong() { + return sizeof(long long); +// CHECK: ret i32 8 +} + +int check_int128() { + return sizeof(__int128); +// CHECK: ret i32 16 +} + +int check_fp16() { + return sizeof(__fp16); +// CHECK: ret i32 2 +} + +int check_float() { + return sizeof(float); +// CHECK: ret i32 4 +} + +int check_double() { + return sizeof(double); +// CHECK: ret i32 8 +} + +int check_longdouble() { + return sizeof(long double); +// CHECK: ret i32 16 +} + +int check_floatComplex() { + return sizeof(float _Complex); +// CHECK: ret i32 8 +} + +int check_doubleComplex() { + return sizeof(double _Complex); +// CHECK: ret i32 16 +} + +int check_longdoubleComplex() { + return sizeof(long double _Complex); +// CHECK: ret i32 32 +} + +int check_bool() { + return sizeof(_Bool); +// CHECK: ret i32 1 +} + +int check_wchar() { +// PCS allows either unsigned short or unsigned int. Linux again says "bigger!" + return sizeof(__WCHAR_TYPE__); +// CHECK: ret i32 4 +} + +int check_wchar_unsigned() { + return (__WCHAR_TYPE__)-1 > (__WCHAR_TYPE__)0; +// CHECK: ret i32 1 +} + +enum Small { + Item +}; + +int foo() { + return sizeof(enum Small); +// CHECK: ret i32 4 +} + diff --git a/test/CodeGen/aarch64-varargs.c b/test/CodeGen/aarch64-varargs.c new file mode 100644 index 0000000000..324a070827 --- /dev/null +++ b/test/CodeGen/aarch64-varargs.c @@ -0,0 +1,238 @@ +// RUN: %clang_cc1 -triple aarch64 -emit-llvm -o - %s | FileCheck %s +#include <stdarg.h> + +// Obviously there's more than one way to implement va_arg. This test should at +// least prevent unintentional regressions caused by refactoring. + +va_list the_list; + +int simple_int(void) { +// CHECK: define i32 @simple_int + return va_arg(the_list, int); +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 +// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] + +// CHECK: [[VAARG_MAYBE_REG]] +// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8 +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 +// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] + +// CHECK: [[VAARG_IN_REG]] +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[GR_OFFS]] +// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i32* +// CHECK: br label %[[VAARG_END:[a-z._0-9]+]] + +// CHECK: [[VAARG_ON_STACK]] +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8 +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to i32* +// CHECK: br label %[[VAARG_END]] + +// CHECK: [[VAARG_END]] +// CHECK: [[ADDR:%[a-z._0-9]+]] = phi i32* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ] +// CHECK: [[RESULT:%[a-z_0-9]+]] = load i32* [[ADDR]] +// CHECK: ret i32 [[RESULT]] +} + +__int128 aligned_int(void) { +// CHECK: define i128 @aligned_int + return va_arg(the_list, __int128); +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 +// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] + +// CHECK: [[VAARG_MAYBE_REG]] +// CHECK: [[ALIGN_REGOFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 15 +// CHECK: [[ALIGNED_REGOFFS:%[a-z_0-9]+]] = and i32 [[ALIGN_REGOFFS]], -16 +// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[ALIGNED_REGOFFS]], 16 +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 +// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] + +// CHECK: [[VAARG_IN_REG]] +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]] +// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i128* +// CHECK: br label %[[VAARG_END:[a-z._0-9]+]] + +// CHECK: [[VAARG_ON_STACK]] +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACKINT:%[a-z_0-9]+]] = ptrtoint i8* [[STACK]] to i64 +// CHECK: [[ALIGN_STACK:%[a-z_0-9]+]] = add i64 [[STACKINT]], 15 +// CHECK: [[ALIGNED_STACK_INT:%[a-z_0-9]+]] = and i64 [[ALIGN_STACK]], -16 +// CHECK: [[ALIGNED_STACK_PTR:%[a-z_0-9]+]] = inttoptr i64 [[ALIGNED_STACK_INT]] to i8* +// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[ALIGNED_STACK_PTR]], i32 16 +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[ALIGNED_STACK_PTR]] to i128* +// CHECK: br label %[[VAARG_END]] + +// CHECK: [[VAARG_END]] +// CHECK: [[ADDR:%[a-z._0-9]+]] = phi i128* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ] +// CHECK: [[RESULT:%[a-z_0-9]+]] = load i128* [[ADDR]] +// CHECK: ret i128 [[RESULT]] +} + +struct bigstruct { + int a[10]; +}; + +struct bigstruct simple_indirect(void) { +// CHECK: define void @simple_indirect + return va_arg(the_list, struct bigstruct); +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 +// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] + +// CHECK: [[VAARG_MAYBE_REG]] +// CHECK-NOT: and i32 +// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8 +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 +// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] + +// CHECK: [[VAARG_IN_REG]] +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[GR_OFFS]] +// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.bigstruct** +// CHECK: br label %[[VAARG_END:[a-z._0-9]+]] + +// CHECK: [[VAARG_ON_STACK]] +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0) +// CHECK-NOT: and i64 +// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8 +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.bigstruct** +// CHECK: br label %[[VAARG_END]] + +// CHECK: [[VAARG_END]] +// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.bigstruct** [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ] +// CHECK: load %struct.bigstruct** [[ADDR]] +} + +struct aligned_bigstruct { + float a; + long double b; +}; + +struct aligned_bigstruct simple_aligned_indirect(void) { +// CHECK: define void @simple_aligned_indirect + return va_arg(the_list, struct aligned_bigstruct); +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 +// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] + +// CHECK: [[VAARG_MAYBE_REG]] +// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8 +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 +// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] + +// CHECK: [[VAARG_IN_REG]] +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[GR_OFFS]] +// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.aligned_bigstruct** +// CHECK: br label %[[VAARG_END:[a-z._0-9]+]] + +// CHECK: [[VAARG_ON_STACK]] +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8 +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.aligned_bigstruct** +// CHECK: br label %[[VAARG_END]] + +// CHECK: [[VAARG_END]] +// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.aligned_bigstruct** [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ] +// CHECK: load %struct.aligned_bigstruct** [[ADDR]] +} + +double simple_double(void) { +// CHECK: define double @simple_double + return va_arg(the_list, double); +// CHECK: [[VR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4) +// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[VR_OFFS]], 0 +// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK]], label %[[VAARG_MAYBE_REG]] + +// CHECK: [[VAARG_MAYBE_REG]] +// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[VR_OFFS]], 16 +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4) +// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 +// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] + +// CHECK: [[VAARG_IN_REG]] +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 2) +// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[VR_OFFS]] +// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to double* +// CHECK: br label %[[VAARG_END]] + +// CHECK: [[VAARG_ON_STACK]] +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8 +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to double* +// CHECK: br label %[[VAARG_END]] + +// CHECK: [[VAARG_END]] +// CHECK: [[ADDR:%[a-z._0-9]+]] = phi double* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ] +// CHECK: [[RESULT:%[a-z_0-9]+]] = load double* [[ADDR]] +// CHECK: ret double [[RESULT]] +} + +struct hfa { + float a, b; +}; + +struct hfa simple_hfa(void) { +// CHECK: define %struct.hfa @simple_hfa + return va_arg(the_list, struct hfa); +// CHECK: [[VR_OFFS:%[a-z_0-9]+]] = load i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4) +// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[VR_OFFS]], 0 +// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] + +// CHECK: [[VAARG_MAYBE_REG]] +// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[VR_OFFS]], 32 +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 4) +// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 +// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] + +// CHECK: [[VAARG_IN_REG]] +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 2) +// CHECK: [[FIRST_REG:%[a-z_0-9]+]] = getelementptr i8* [[REG_TOP]], i32 [[VR_OFFS]] +// CHECK: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[FIRST_REG]], i32 0 +// CHECK: [[EL_TYPED:%[a-z_0-9]+]] = bitcast i8* [[EL_ADDR]] to float* +// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float]* %[[TMP_HFA:[a-z_.0-9]+]], i32 0, i32 0 +// CHECK: [[EL:%[a-z_0-9]+]] = load float* [[EL_TYPED]] +// CHECK: store float [[EL]], float* [[EL_TMPADDR]] +// CHECK: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8* [[FIRST_REG]], i32 16 +// CHECK: [[EL_TYPED:%[a-z_0-9]+]] = bitcast i8* [[EL_ADDR]] to float* +// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float]* %[[TMP_HFA]], i32 0, i32 1 +// CHECK: [[EL:%[a-z_0-9]+]] = load float* [[EL_TYPED]] +// CHECK: store float [[EL]], float* [[EL_TMPADDR]] +// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast [2 x float]* %[[TMP_HFA]] to %struct.hfa* +// CHECK: br label %[[VAARG_END:[a-z_.0-9]+]] + +// CHECK: [[VAARG_ON_STACK]] +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8* [[STACK]], i32 8 +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.hfa* +// CHECK: br label %[[VAARG_END]] + +// CHECK: [[VAARG_END]] +// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.hfa* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ] +} + +void check_start(int n, ...) { +// CHECK: define void @check_start(i32 %n, ...) + + va_list the_list; + va_start(the_list, n); +// CHECK: [[THE_LIST:%[a-z_0-9]+]] = alloca %struct.__va_list +// CHECK: [[VOIDP_THE_LIST:%[a-z_0-9]+]] = bitcast %struct.__va_list* [[THE_LIST]] to i8* +// CHECK: call void @llvm.va_start(i8* [[VOIDP_THE_LIST]]) +} + + diff --git a/test/CodeGenCXX/aarch64-arguments.cpp b/test/CodeGenCXX/aarch64-arguments.cpp new file mode 100644 index 0000000000..f56ad0bbdc --- /dev/null +++ b/test/CodeGenCXX/aarch64-arguments.cpp @@ -0,0 +1,5 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux -emit-llvm -w -o - %s | FileCheck -check-prefix=PCS %s + +// PCS: define void @{{.*}}(i8 %a +struct s0 {}; +void f0(s0 a) {} diff --git a/test/CodeGenCXX/aarch64-cxxabi.cpp b/test/CodeGenCXX/aarch64-cxxabi.cpp new file mode 100644 index 0000000000..04d9493ae6 --- /dev/null +++ b/test/CodeGenCXX/aarch64-cxxabi.cpp @@ -0,0 +1,96 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -w -o - %s | FileCheck %s + +// Check differences between the generic Itanium ABI, the AArch32 version and +// the AArch64 version. + +//////////////////////////////////////////////////////////////////////////////// + +// The ABI says that the key function is the "textually first, non-inline, +// non-pure, virtual member function". The generic version decides this after +// the completion of the class definition; the AArch32 version decides this at +// the end of the translation unit. + +// We construct a class which needs a VTable here under generic ABI, but not +// AArch32. + +// (see next section for explanation of guard) +// CHECK: @_ZGVZ15guard_variablesiE4mine = internal global i64 0 + +// CHECK: @_ZTV16CheckKeyFunction = +struct CheckKeyFunction { + virtual void foo(); +}; + +// This is not inline when CheckKeyFunction is completed, so +// CheckKeyFunction::foo is the key function. VTables should be emitted. +inline void CheckKeyFunction::foo() { +} + +//////////////////////////////////////////////////////////////////////////////// + +// Guard variables only specify and use the low bit to determine status, rather +// than the low byte as in the generic Itanium ABI. However, unlike 32-bit ARM, +// they *are* 64-bits wide so check that in case confusion has occurred. + +class Guarded { +public: + Guarded(int i); + ~Guarded(); +}; + +void guard_variables(int a) { + static Guarded mine(a); +// CHECK: [[GUARDBIT:%[0-9]+]] = and i64 {{%[0-9]+}}, 1 +// CHECK: icmp eq i64 [[GUARDBIT]], 0 + + // As guards are 64-bit, these helpers should take 64-bit pointers. +// CHECK: call i32 @__cxa_guard_acquire(i64* +// CHECK: call void @__cxa_guard_release(i64* +} + +//////////////////////////////////////////////////////////////////////////////// + +// Member function pointers use the adj field to distinguish between virtual and +// nonvirtual members. As a result the adjustment is shifted (if ptr was used, a +// mask would be expected instead). + +class C { + int a(); + virtual int b(); +}; + + +int member_pointer(C &c, int (C::*func)()) { +// CHECK: ashr i64 %[[MEMPTRADJ:[0-9a-z.]+]], 1 +// CHECK: %[[ISVIRTUAL:[0-9]+]] = and i64 %[[MEMPTRADJ]], 1 +// CHECK: icmp ne i64 %[[ISVIRTUAL]], 0 + return (c.*func)(); +} + +//////////////////////////////////////////////////////////////////////////////// + +// AArch64 PCS says that va_list type is based on "struct __va_list ..." in the +// std namespace, which means it should mangle as "St9__va_list". + +// CHECK: @_Z7va_funcSt9__va_list +void va_func(__builtin_va_list l) { +} + +//////////////////////////////////////////////////////////////////////////////// + +// AArch64 constructors (like generic Itanium, but unlike AArch32) do not return +// "this". + +void test_constructor() { + Guarded g(42); +// CHECK: call void @_ZN7GuardedC1Ei +} + +//////////////////////////////////////////////////////////////////////////////// + +// In principle the AArch32 ABI allows this to be accomplished via a call to +// __aeabi_atexit instead of __cxa_atexit. Clang doesn't make use of this at the +// moment, but it's definitely not allowed for AArch64. + +// CHECK: call i32 @__cxa_atexit +Guarded g(42); diff --git a/test/Driver/aarch64-features.c b/test/Driver/aarch64-features.c new file mode 100644 index 0000000000..2acb7157f6 --- /dev/null +++ b/test/Driver/aarch64-features.c @@ -0,0 +1,5 @@ +// RUN: %clang -target aarch64-none-linux-gnu -### %s -fsyntax-only 2>&1 | FileCheck %s + +// The AArch64 PCS states that chars should be unsigned. +// CHECK: fno-signed-char + diff --git a/test/Preprocessor/aarch64-target-features.c b/test/Preprocessor/aarch64-target-features.c new file mode 100644 index 0000000000..65104e3311 --- /dev/null +++ b/test/Preprocessor/aarch64-target-features.c @@ -0,0 +1,30 @@ +// RUN: %clang -target aarch64-none-linux-gnu -x c -E -dM %s -o - | FileCheck %s +// CHECK: __AARCH 8 +// CHECK: __AARCH64EL__ +// CHECK: __AARCH_ACLE 101 +// CHECK-NOT: __AARCH_ADVSIMD_FP +// CHECK-NOT: __AARCH_FEATURE_ADVSIMD +// CHECK-NOT: __AARCH_FEATURE_BIG_ENDIAN +// CHECK: __AARCH_FEATURE_CLZ 1 +// CHECK: __AARCH_FEATURE_FMA 1 +// CHECK: __AARCH_FEATURE_LDREX 0xf +// CHECK: __AARCH_FEATURE_UNALIGNED 1 +// CHECK: __AARCH_FP 0xe +// CHECK-NOT: __AARCH_FP_FAST +// CHECK: __AARCH_FP16_FORMAT_IEEE 1 +// CHECK: __AARCH_FP_FENV_ROUNDING 1 +// CHECK: __AARCH_PROFILE 'A' +// CHECK: __AARCH_SIZEOF_MINIMAL_ENUM 4 +// CHECK: __AARCH_SIZEOF_WCHAR_T 4 +// CHECK: __aarch64__ + + +// RUN: %clang -target aarch64-none-linux-gnu -ffast-math -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-FASTMATH %s +// CHECK-FASTMATH: __AARCH_FP_FAST + +// RUN: %clang -target aarch64-none-linux-gnu -fshort-wchar -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SHORTWCHAR %s +// CHECK-SHORTWCHAR: __AARCH_SIZEOF_WCHAR_T 2 + +// RUN: %clang -target aarch64-none-linux-gnu -fshort-enums -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SHORTENUMS %s +// CHECK-SHORTENUMS: __AARCH_SIZEOF_MINIMAL_ENUM 1 + |