diff options
author | Reid Spencer <rspencer@reidspencer.com> | 2007-07-11 17:01:13 +0000 |
---|---|---|
committer | Reid Spencer <rspencer@reidspencer.com> | 2007-07-11 17:01:13 +0000 |
commit | 5f016e2cb5d11daeb237544de1c5d59f20fe1a6e (patch) | |
tree | 8b6bfcb8783d16827f896d5facbd4549300e8a1e | |
parent | a5f182095bf2065ca94f1c86957ee91f9068964b (diff) |
Stage two of getting CFE top correct.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@39734 91177308-0d34-0410-b5e6-96231b3b80d8
230 files changed, 34786 insertions, 5 deletions
diff --git a/AST/ASTContext.cpp b/AST/ASTContext.cpp new file mode 100644 index 0000000000..b1c20c98e5 --- /dev/null +++ b/AST/ASTContext.cpp @@ -0,0 +1,531 @@ +//===--- ASTContext.cpp - Context to hold long-lived AST nodes ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ASTContext interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/SmallVector.h" +using namespace clang; + +enum FloatingRank { + FloatRank, DoubleRank, LongDoubleRank +}; + +ASTContext::~ASTContext() { + // Deallocate all the types. + while (!Types.empty()) { + if (FunctionTypeProto *FT = dyn_cast<FunctionTypeProto>(Types.back())) { + // Destroy the object, but don't call delete. These are malloc'd. + FT->~FunctionTypeProto(); + free(FT); + } else { + delete Types.back(); + } + Types.pop_back(); + } +} + +void ASTContext::PrintStats() const { + fprintf(stderr, "*** AST Context Stats:\n"); + fprintf(stderr, " %d types total.\n", (int)Types.size()); + unsigned NumBuiltin = 0, NumPointer = 0, NumArray = 0, NumFunctionP = 0; + unsigned NumFunctionNP = 0, NumTypeName = 0, NumTagged = 0, NumReference = 0; + + unsigned NumTagStruct = 0, NumTagUnion = 0, NumTagEnum = 0, NumTagClass = 0; + + for (unsigned i = 0, e = Types.size(); i != e; ++i) { + Type *T = Types[i]; + if (isa<BuiltinType>(T)) + ++NumBuiltin; + else if (isa<PointerType>(T)) + ++NumPointer; + else if (isa<ReferenceType>(T)) + ++NumReference; + else if (isa<ArrayType>(T)) + ++NumArray; + else if (isa<FunctionTypeNoProto>(T)) + ++NumFunctionNP; + else if (isa<FunctionTypeProto>(T)) + ++NumFunctionP; + else if (isa<TypedefType>(T)) + ++NumTypeName; + else if (TagType *TT = dyn_cast<TagType>(T)) { + ++NumTagged; + switch (TT->getDecl()->getKind()) { + default: assert(0 && "Unknown tagged type!"); + case Decl::Struct: ++NumTagStruct; break; + case Decl::Union: ++NumTagUnion; break; + case Decl::Class: ++NumTagClass; break; + case Decl::Enum: ++NumTagEnum; break; + } + } else { + assert(0 && "Unknown type!"); + } + } + + fprintf(stderr, " %d builtin types\n", NumBuiltin); + fprintf(stderr, " %d pointer types\n", NumPointer); + fprintf(stderr, " %d reference types\n", NumReference); + fprintf(stderr, " %d array types\n", NumArray); + fprintf(stderr, " %d function types with proto\n", NumFunctionP); + fprintf(stderr, " %d function types with no proto\n", NumFunctionNP); + fprintf(stderr, " %d typename (typedef) types\n", NumTypeName); + fprintf(stderr, " %d tagged types\n", NumTagged); + fprintf(stderr, " %d struct types\n", NumTagStruct); + fprintf(stderr, " %d union types\n", NumTagUnion); + fprintf(stderr, " %d class types\n", NumTagClass); + fprintf(stderr, " %d enum types\n", NumTagEnum); + fprintf(stderr, "Total bytes = %d\n", int(NumBuiltin*sizeof(BuiltinType)+ + NumPointer*sizeof(PointerType)+NumArray*sizeof(ArrayType)+ + NumFunctionP*sizeof(FunctionTypeProto)+ + NumFunctionNP*sizeof(FunctionTypeNoProto)+ + NumTypeName*sizeof(TypedefType)+NumTagged*sizeof(TagType))); +} + + +void ASTContext::InitBuiltinType(QualType &R, BuiltinType::Kind K) { + Types.push_back((R = QualType(new BuiltinType(K),0)).getTypePtr()); +} + + +void ASTContext::InitBuiltinTypes() { + assert(VoidTy.isNull() && "Context reinitialized?"); + + // C99 6.2.5p19. + InitBuiltinType(VoidTy, BuiltinType::Void); + + // C99 6.2.5p2. + InitBuiltinType(BoolTy, BuiltinType::Bool); + // C99 6.2.5p3. + if (Target.isCharSigned(SourceLocation())) + InitBuiltinType(CharTy, BuiltinType::Char_S); + else + InitBuiltinType(CharTy, BuiltinType::Char_U); + // C99 6.2.5p4. + InitBuiltinType(SignedCharTy, BuiltinType::SChar); + InitBuiltinType(ShortTy, BuiltinType::Short); + InitBuiltinType(IntTy, BuiltinType::Int); + InitBuiltinType(LongTy, BuiltinType::Long); + InitBuiltinType(LongLongTy, BuiltinType::LongLong); + + // C99 6.2.5p6. + InitBuiltinType(UnsignedCharTy, BuiltinType::UChar); + InitBuiltinType(UnsignedShortTy, BuiltinType::UShort); + InitBuiltinType(UnsignedIntTy, BuiltinType::UInt); + InitBuiltinType(UnsignedLongTy, BuiltinType::ULong); + InitBuiltinType(UnsignedLongLongTy, BuiltinType::ULongLong); + + // C99 6.2.5p10. + InitBuiltinType(FloatTy, BuiltinType::Float); + InitBuiltinType(DoubleTy, BuiltinType::Double); + InitBuiltinType(LongDoubleTy, BuiltinType::LongDouble); + + // C99 6.2.5p11. + FloatComplexTy = getComplexType(FloatTy); + DoubleComplexTy = getComplexType(DoubleTy); + LongDoubleComplexTy = getComplexType(LongDoubleTy); +} + +/// getComplexType - Return the uniqued reference to the type for a complex +/// number with the specified element type. +QualType ASTContext::getComplexType(QualType T) { + // Unique pointers, to guarantee there is only one pointer of a particular + // structure. + llvm::FoldingSetNodeID ID; + ComplexType::Profile(ID, T); + + void *InsertPos = 0; + if (ComplexType *CT = ComplexTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(CT, 0); + + // If the pointee type isn't canonical, this won't be a canonical type either, + // so fill in the canonical type field. + QualType Canonical; + if (!T->isCanonical()) { + Canonical = getComplexType(T.getCanonicalType()); + + // Get the new insert position for the node we care about. + ComplexType *NewIP = ComplexTypes.FindNodeOrInsertPos(ID, InsertPos); + assert(NewIP == 0 && "Shouldn't be in the map!"); + } + ComplexType *New = new ComplexType(T, Canonical); + Types.push_back(New); + ComplexTypes.InsertNode(New, InsertPos); + return QualType(New, 0); +} + + +/// getPointerType - Return the uniqued reference to the type for a pointer to +/// the specified type. +QualType ASTContext::getPointerType(QualType T) { + // Unique pointers, to guarantee there is only one pointer of a particular + // structure. + llvm::FoldingSetNodeID ID; + PointerType::Profile(ID, T); + + void *InsertPos = 0; + if (PointerType *PT = PointerTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(PT, 0); + + // If the pointee type isn't canonical, this won't be a canonical type either, + // so fill in the canonical type field. + QualType Canonical; + if (!T->isCanonical()) { + Canonical = getPointerType(T.getCanonicalType()); + + // Get the new insert position for the node we care about. + PointerType *NewIP = PointerTypes.FindNodeOrInsertPos(ID, InsertPos); + assert(NewIP == 0 && "Shouldn't be in the map!"); + } + PointerType *New = new PointerType(T, Canonical); + Types.push_back(New); + PointerTypes.InsertNode(New, InsertPos); + return QualType(New, 0); +} + +/// getReferenceType - Return the uniqued reference to the type for a reference +/// to the specified type. +QualType ASTContext::getReferenceType(QualType T) { + // Unique pointers, to guarantee there is only one pointer of a particular + // structure. + llvm::FoldingSetNodeID ID; + ReferenceType::Profile(ID, T); + + void *InsertPos = 0; + if (ReferenceType *RT = ReferenceTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(RT, 0); + + // If the referencee type isn't canonical, this won't be a canonical type + // either, so fill in the canonical type field. + QualType Canonical; + if (!T->isCanonical()) { + Canonical = getReferenceType(T.getCanonicalType()); + + // Get the new insert position for the node we care about. + ReferenceType *NewIP = ReferenceTypes.FindNodeOrInsertPos(ID, InsertPos); + assert(NewIP == 0 && "Shouldn't be in the map!"); + } + + ReferenceType *New = new ReferenceType(T, Canonical); + Types.push_back(New); + ReferenceTypes.InsertNode(New, InsertPos); + return QualType(New, 0); +} + +/// getArrayType - Return the unique reference to the type for an array of the +/// specified element type. +QualType ASTContext::getArrayType(QualType EltTy,ArrayType::ArraySizeModifier ASM, + unsigned EltTypeQuals, Expr *NumElts) { + // Unique array types, to guarantee there is only one array of a particular + // structure. + llvm::FoldingSetNodeID ID; + ArrayType::Profile(ID, ASM, EltTypeQuals, EltTy, NumElts); + + void *InsertPos = 0; + if (ArrayType *ATP = ArrayTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(ATP, 0); + + // If the element type isn't canonical, this won't be a canonical type either, + // so fill in the canonical type field. + QualType Canonical; + if (!EltTy->isCanonical()) { + Canonical = getArrayType(EltTy.getCanonicalType(), ASM, EltTypeQuals, + NumElts); + + // Get the new insert position for the node we care about. + ArrayType *NewIP = ArrayTypes.FindNodeOrInsertPos(ID, InsertPos); + assert(NewIP == 0 && "Shouldn't be in the map!"); + } + + ArrayType *New = new ArrayType(EltTy, ASM, EltTypeQuals, Canonical, NumElts); + ArrayTypes.InsertNode(New, InsertPos); + Types.push_back(New); + return QualType(New, 0); +} + +/// convertToVectorType - Return the unique reference to a vector type of +/// the specified element type and size. VectorType can be a pointer, array, +/// function, or built-in type (i.e. _Bool, integer, or float). +QualType ASTContext::convertToVectorType(QualType vecType, unsigned NumElts) { + BuiltinType *baseType; + + baseType = dyn_cast<BuiltinType>(vecType.getCanonicalType().getTypePtr()); + assert(baseType != 0 && + "convertToVectorType(): Complex vector types unimplemented"); + + // Check if we've already instantiated a vector of this type. + llvm::FoldingSetNodeID ID; + VectorType::Profile(ID, vecType, NumElts); + void *InsertPos = 0; + if (VectorType *VTP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(VTP, 0); + + // If the element type isn't canonical, this won't be a canonical type either, + // so fill in the canonical type field. + QualType Canonical; + if (!vecType->isCanonical()) { + Canonical = convertToVectorType(vecType.getCanonicalType(), NumElts); + + // Get the new insert position for the node we care about. + VectorType *NewIP = VectorTypes.FindNodeOrInsertPos(ID, InsertPos); + assert(NewIP == 0 && "Shouldn't be in the map!"); + } + VectorType *New = new VectorType(vecType, NumElts, Canonical); + VectorTypes.InsertNode(New, InsertPos); + Types.push_back(New); + return QualType(New, 0); +} + +/// getFunctionTypeNoProto - Return a K&R style C function type like 'int()'. +/// +QualType ASTContext::getFunctionTypeNoProto(QualType ResultTy) { + // Unique functions, to guarantee there is only one function of a particular + // structure. + llvm::FoldingSetNodeID ID; + FunctionTypeNoProto::Profile(ID, ResultTy); + + void *InsertPos = 0; + if (FunctionTypeNoProto *FT = + FunctionTypeNoProtos.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(FT, 0); + + QualType Canonical; + if (!ResultTy->isCanonical()) { + Canonical = getFunctionTypeNoProto(ResultTy.getCanonicalType()); + + // Get the new insert position for the node we care about. + FunctionTypeNoProto *NewIP = + FunctionTypeNoProtos.FindNodeOrInsertPos(ID, InsertPos); + assert(NewIP == 0 && "Shouldn't be in the map!"); + } + + FunctionTypeNoProto *New = new FunctionTypeNoProto(ResultTy, Canonical); + Types.push_back(New); + FunctionTypeProtos.InsertNode(New, InsertPos); + return QualType(New, 0); +} + +/// getFunctionType - Return a normal function type with a typed argument +/// list. isVariadic indicates whether the argument list includes '...'. +QualType ASTContext::getFunctionType(QualType ResultTy, QualType *ArgArray, + unsigned NumArgs, bool isVariadic) { + // Unique functions, to guarantee there is only one function of a particular + // structure. + llvm::FoldingSetNodeID ID; + FunctionTypeProto::Profile(ID, ResultTy, ArgArray, NumArgs, isVariadic); + + void *InsertPos = 0; + if (FunctionTypeProto *FTP = + FunctionTypeProtos.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(FTP, 0); + + // Determine whether the type being created is already canonical or not. + bool isCanonical = ResultTy->isCanonical(); + for (unsigned i = 0; i != NumArgs && isCanonical; ++i) + if (!ArgArray[i]->isCanonical()) + isCanonical = false; + + // If this type isn't canonical, get the canonical version of it. + QualType Canonical; + if (!isCanonical) { + llvm::SmallVector<QualType, 16> CanonicalArgs; + CanonicalArgs.reserve(NumArgs); + for (unsigned i = 0; i != NumArgs; ++i) + CanonicalArgs.push_back(ArgArray[i].getCanonicalType()); + + Canonical = getFunctionType(ResultTy.getCanonicalType(), + &CanonicalArgs[0], NumArgs, + isVariadic); + + // Get the new insert position for the node we care about. + FunctionTypeProto *NewIP = + FunctionTypeProtos.FindNodeOrInsertPos(ID, InsertPos); + assert(NewIP == 0 && "Shouldn't be in the map!"); + } + + // FunctionTypeProto objects are not allocated with new because they have a + // variable size array (for parameter types) at the end of them. + FunctionTypeProto *FTP = + (FunctionTypeProto*)malloc(sizeof(FunctionTypeProto) + + (NumArgs-1)*sizeof(QualType)); + new (FTP) FunctionTypeProto(ResultTy, ArgArray, NumArgs, isVariadic, + Canonical); + Types.push_back(FTP); + FunctionTypeProtos.InsertNode(FTP, InsertPos); + return QualType(FTP, 0); +} + +/// getTypedefType - Return the unique reference to the type for the +/// specified typename decl. +QualType ASTContext::getTypedefType(TypedefDecl *Decl) { + if (Decl->TypeForDecl) return QualType(Decl->TypeForDecl, 0); + + QualType Canonical = Decl->getUnderlyingType().getCanonicalType(); + Decl->TypeForDecl = new TypedefType(Decl, Canonical); + Types.push_back(Decl->TypeForDecl); + return QualType(Decl->TypeForDecl, 0); +} + +/// getTagDeclType - Return the unique reference to the type for the +/// specified TagDecl (struct/union/class/enum) decl. +QualType ASTContext::getTagDeclType(TagDecl *Decl) { + // The decl stores the type cache. + if (Decl->TypeForDecl) return QualType(Decl->TypeForDecl, 0); + + Decl->TypeForDecl = new TagType(Decl, QualType()); + Types.push_back(Decl->TypeForDecl); + return QualType(Decl->TypeForDecl, 0); +} + +/// getSizeType - Return the unique type for "size_t" (C99 7.17), the result +/// of the sizeof operator (C99 6.5.3.4p4). The value is target dependent and +/// needs to agree with the definition in <stddef.h>. +QualType ASTContext::getSizeType() const { + // On Darwin, size_t is defined as a "long unsigned int". + // FIXME: should derive from "Target". + return UnsignedLongTy; +} + +/// getIntegerBitwidth - Return the bitwidth of the specified integer type +/// according to the target. 'Loc' specifies the source location that +/// requires evaluation of this property. +unsigned ASTContext::getIntegerBitwidth(QualType T, SourceLocation Loc) { + if (const TagType *TT = dyn_cast<TagType>(T.getCanonicalType())) { + assert(TT->getDecl()->getKind() == Decl::Enum && "not an int or enum"); + assert(0 && "FIXME: getIntegerBitwidth(enum) unimplemented!"); + } + + const BuiltinType *BT = cast<BuiltinType>(T.getCanonicalType()); + switch (BT->getKind()) { + default: assert(0 && "getIntegerBitwidth(): not a built-in integer"); + case BuiltinType::Bool: return Target.getBoolWidth(Loc); + case BuiltinType::Char_S: + case BuiltinType::Char_U: + case BuiltinType::SChar: + case BuiltinType::UChar: return Target.getCharWidth(Loc); + case BuiltinType::Short: + case BuiltinType::UShort: return Target.getShortWidth(Loc); + case BuiltinType::Int: + case BuiltinType::UInt: return Target.getIntWidth(Loc); + case BuiltinType::Long: + case BuiltinType::ULong: return Target.getLongWidth(Loc); + case BuiltinType::LongLong: + case BuiltinType::ULongLong: return Target.getLongLongWidth(Loc); + } +} + +/// getIntegerRank - Return an integer conversion rank (C99 6.3.1.1p1). This +/// routine will assert if passed a built-in type that isn't an integer or enum. +static int getIntegerRank(QualType t) { + if (const TagType *TT = dyn_cast<TagType>(t.getCanonicalType())) { + assert(TT->getDecl()->getKind() == Decl::Enum && "not an int or enum"); + return 4; + } + + const BuiltinType *BT = cast<BuiltinType>(t.getCanonicalType()); + switch (BT->getKind()) { + default: + assert(0 && "getIntegerRank(): not a built-in integer"); + case BuiltinType::Bool: + return 1; + case BuiltinType::Char_S: + case BuiltinType::Char_U: + case BuiltinType::SChar: + case BuiltinType::UChar: + return 2; + case BuiltinType::Short: + case BuiltinType::UShort: + return 3; + case BuiltinType::Int: + case BuiltinType::UInt: + return 4; + case BuiltinType::Long: + case BuiltinType::ULong: + return 5; + case BuiltinType::LongLong: + case BuiltinType::ULongLong: + return 6; + } +} + +/// getFloatingRank - Return a relative rank for floating point types. +/// This routine will assert if passed a built-in type that isn't a float. +static int getFloatingRank(QualType T) { + T = T.getCanonicalType(); + if (ComplexType *CT = dyn_cast<ComplexType>(T)) + return getFloatingRank(CT->getElementType()); + + switch (cast<BuiltinType>(T)->getKind()) { + default: assert(0 && "getFloatingPointRank(): not a floating type"); + case BuiltinType::Float: return FloatRank; + case BuiltinType::Double: return DoubleRank; + case BuiltinType::LongDouble: return LongDoubleRank; + } +} + +// maxComplexType - the following code handles 3 different combinations: +// complex/complex, complex/float, float/complex. +// When both operands are complex, the shorter operand is converted to the +// type of the longer, and that is the type of the result. This corresponds +// to what is done when combining two real floating-point operands. +// The fun begins when size promotion occur across type domains. g +// getFloatingRank & convertFloatingRankToComplexType handle this without +// enumerating all permutations. +// It also allows us to add new types without breakage. +// From H&S 6.3.4: When one operand is complex and the other is a real +// floating-point type, the less precise type is converted, within it's +// real or complex domain, to the precision of the other type. For example, +// when combining a "long double" with a "double _Complex", the +// "double _Complex" is promoted to "long double _Complex". + +QualType ASTContext::maxComplexType(QualType lt, QualType rt) const { + switch (std::max(getFloatingRank(lt), getFloatingRank(rt))) { + default: assert(0 && "convertRankToComplex(): illegal value for rank"); + case FloatRank: return FloatComplexTy; + case DoubleRank: return DoubleComplexTy; + case LongDoubleRank: return LongDoubleComplexTy; + } +} + +// maxFloatingType - handles the simple case, both operands are floats. +QualType ASTContext::maxFloatingType(QualType lt, QualType rt) { + return getFloatingRank(lt) > getFloatingRank(rt) ? lt : rt; +} + +// maxIntegerType - Returns the highest ranked integer type. Handles 3 case: +// unsigned/unsigned, signed/signed, signed/unsigned. C99 6.3.1.8p1. +QualType ASTContext::maxIntegerType(QualType lhs, QualType rhs) { + if (lhs == rhs) return lhs; + + bool t1Unsigned = lhs->isUnsignedIntegerType(); + bool t2Unsigned = rhs->isUnsignedIntegerType(); + + if ((t1Unsigned && t2Unsigned) || (!t1Unsigned && !t2Unsigned)) + return getIntegerRank(lhs) >= getIntegerRank(rhs) ? lhs : rhs; + + // We have two integer types with differing signs + QualType unsignedType = t1Unsigned ? lhs : rhs; + QualType signedType = t1Unsigned ? rhs : lhs; + + if (getIntegerRank(unsignedType) >= getIntegerRank(signedType)) + return unsignedType; + else { + // FIXME: Need to check if the signed type can represent all values of the + // unsigned type. If it can, then the result is the signed type. + // If it can't, then the result is the unsigned version of the signed type. + // Should probably add a helper that returns a signed integer type from + // an unsigned (and vice versa). C99 6.3.1.8. + return signedType; + } +} diff --git a/AST/Builtins.cpp b/AST/Builtins.cpp new file mode 100644 index 0000000000..454085bf2a --- /dev/null +++ b/AST/Builtins.cpp @@ -0,0 +1,125 @@ +//===--- Builtins.cpp - Builtin function implementation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements various things for builtin functions. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Builtins.h" +#include "clang/AST/ASTContext.h" +#include "clang/Lex/IdentifierTable.h" +#include "clang/Basic/TargetInfo.h" +using namespace clang; + +static const Builtin::Info BuiltinInfo[] = { + { "not a builtin function", 0, 0 }, +#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS }, +#include "clang/AST/Builtins.def" +}; + +const Builtin::Info &Builtin::Context::GetRecord(unsigned ID) const { + if (ID < Builtin::FirstTSBuiltin) + return BuiltinInfo[ID]; + assert(ID - Builtin::FirstTSBuiltin < NumTSRecords && "Invalid builtin ID!"); + return TSRecords[ID - Builtin::FirstTSBuiltin]; +} + + +/// InitializeBuiltins - Mark the identifiers for all the builtins with their +/// appropriate builtin ID # and mark any non-portable builtin identifiers as +/// such. +void Builtin::Context::InitializeBuiltins(IdentifierTable &Table, + const TargetInfo &Target) { + // Step #1: mark all target-independent builtins with their ID's. + for (unsigned i = Builtin::NotBuiltin+1; i != Builtin::FirstTSBuiltin; ++i) + Table.get(BuiltinInfo[i].Name).setBuiltinID(i); + + // Step #2: handle target builtins. + std::vector<const char *> NonPortableBuiltins; + Target.getTargetBuiltins(TSRecords, NumTSRecords, NonPortableBuiltins); + + // Step #2a: Register target-specific builtins. + for (unsigned i = 0, e = NumTSRecords; i != e; ++i) + Table.get(TSRecords[i].Name).setBuiltinID(i+Builtin::FirstTSBuiltin); + + // Step #2b: Mark non-portable builtins as such. + for (unsigned i = 0, e = NonPortableBuiltins.size(); i != e; ++i) + Table.get(NonPortableBuiltins[i]).setNonPortableBuiltin(true); +} + +/// DecodeTypeFromStr - This decodes one type descriptor from Str, advancing the +/// pointer over the consumed characters. This returns the resultant type. +static QualType DecodeTypeFromStr(const char *&Str, ASTContext &Context) { + // Modifiers. + bool Long = false, LongLong = false, Signed = false, Unsigned = false; + + // Read the modifiers first. + bool Done = false; + while (!Done) { + switch (*Str++) { + default: Done = true; --Str; break; + case 'S': + assert(!Unsigned && "Can't use both 'S' and 'U' modifiers!"); + assert(!Signed && "Can't use 'S' modifier multiple times!"); + Signed = true; + break; + case 'U': + assert(!Signed && "Can't use both 'S' and 'U' modifiers!"); + assert(!Unsigned && "Can't use 'S' modifier multiple times!"); + Unsigned = true; + break; + case 'L': + assert(!LongLong && "Can't have LLL modifier"); + if (Long) + LongLong = true; + else + Long = true; + break; + } + } + + // Read the base type. + switch (*Str++) { + default: assert(0 && "Unknown builtin type letter!"); + case 'v': + assert(!Long && !Signed && !Unsigned && "Bad modifiers used with 'f'!"); + return Context.VoidTy; + case 'f': + assert(!Long && !Signed && !Unsigned && "Bad modifiers used with 'f'!"); + return Context.FloatTy; + case 'd': + assert(!LongLong && !Signed && !Unsigned && "Bad modifiers used with 'd'!"); + if (Long) + return Context.LongDoubleTy; + return Context.DoubleTy; + case 's': + assert(!LongLong && "Bad modifiers used with 's'!"); + if (Unsigned) + return Context.UnsignedShortTy; + return Context.ShortTy; + //case 'i': + } +} + +/// GetBuiltinType - Return the type for the specified builtin. +QualType Builtin::Context::GetBuiltinType(unsigned id, ASTContext &Context)const{ + const char *TypeStr = GetRecord(id).Type; + + llvm::SmallVector<QualType, 8> ArgTypes; + + QualType ResType = DecodeTypeFromStr(TypeStr, Context); + while (TypeStr[0] && TypeStr[0] != '.') + ArgTypes.push_back(DecodeTypeFromStr(TypeStr, Context)); + + assert((TypeStr[0] != '.' || TypeStr[1] == 0) && + "'.' should only occur at end of builtin type list!"); + + return Context.getFunctionType(ResType, &ArgTypes[0], ArgTypes.size(), + TypeStr[0] == '.'); +} diff --git a/AST/Decl.cpp b/AST/Decl.cpp new file mode 100644 index 0000000000..22fbabbd30 --- /dev/null +++ b/AST/Decl.cpp @@ -0,0 +1,161 @@ +//===--- Decl.cpp - Declaration AST Node Implementation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Decl class and subclasses. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Decl.h" +#include "clang/Lex/IdentifierTable.h" +using namespace clang; + +// temporary statistics gathering +static unsigned nFuncs = 0; +static unsigned nBlockVars = 0; +static unsigned nFileVars = 0; +static unsigned nParmVars = 0; +static unsigned nSUC = 0; +static unsigned nEnumConst = 0; +static unsigned nEnumDecls = 0; +static unsigned nTypedef = 0; +static unsigned nFieldDecls = 0; +static bool StatSwitch = false; + +bool Decl::CollectingStats(bool enable) { + if (enable) StatSwitch = true; + return StatSwitch; +} + +void Decl::PrintStats() { + fprintf(stderr, "*** Decl Stats:\n"); + fprintf(stderr, " %d decls total.\n", + int(nFuncs+nBlockVars+nFileVars+nParmVars+nFieldDecls+nSUC+ + nEnumDecls+nEnumConst+nTypedef)); + fprintf(stderr, " %d function decls, %d each (%d bytes)\n", + nFuncs, (int)sizeof(FunctionDecl), int(nFuncs*sizeof(FunctionDecl))); + fprintf(stderr, " %d block variable decls, %d each (%d bytes)\n", + nBlockVars, (int)sizeof(BlockVarDecl), + int(nBlockVars*sizeof(BlockVarDecl))); + fprintf(stderr, " %d file variable decls, %d each (%d bytes)\n", + nFileVars, (int)sizeof(FileVarDecl), + int(nFileVars*sizeof(FileVarDecl))); + fprintf(stderr, " %d parameter variable decls, %d each (%d bytes)\n", + nParmVars, (int)sizeof(ParmVarDecl), + int(nParmVars*sizeof(ParmVarDecl))); + fprintf(stderr, " %d field decls, %d each (%d bytes)\n", + nFieldDecls, (int)sizeof(FieldDecl), + int(nFieldDecls*sizeof(FieldDecl))); + fprintf(stderr, " %d struct/union/class decls, %d each (%d bytes)\n", + nSUC, (int)sizeof(RecordDecl), + int(nSUC*sizeof(RecordDecl))); + fprintf(stderr, " %d enum decls, %d each (%d bytes)\n", + nEnumDecls, (int)sizeof(EnumDecl), + int(nEnumDecls*sizeof(EnumDecl))); + fprintf(stderr, " %d enum constant decls, %d each (%d bytes)\n", + nEnumConst, (int)sizeof(EnumConstantDecl), + int(nEnumConst*sizeof(EnumConstantDecl))); + fprintf(stderr, " %d typedef decls, %d each (%d bytes)\n", + nTypedef, (int)sizeof(TypedefDecl),int(nTypedef*sizeof(TypedefDecl))); + fprintf(stderr, "Total bytes = %d\n", + int(nFuncs*sizeof(FunctionDecl)+nBlockVars*sizeof(BlockVarDecl)+ + nFileVars*sizeof(FileVarDecl)+nParmVars*sizeof(ParmVarDecl)+ + nFieldDecls*sizeof(FieldDecl)+nSUC*sizeof(RecordDecl)+ + nEnumDecls*sizeof(EnumDecl)+nEnumConst*sizeof(EnumConstantDecl)+ + nTypedef*sizeof(TypedefDecl))); +} + +void Decl::addDeclKind(const Kind k) { + switch (k) { + case Typedef: + nTypedef++; + break; + case Function: + nFuncs++; + break; + case BlockVariable: + nBlockVars++; + break; + case FileVariable: + nFileVars++; + break; + case ParmVariable: + nParmVars++; + break; + case EnumConstant: + nEnumConst++; + break; + case Field: + nFieldDecls++; + break; + case Struct: + case Union: + case Class: + nSUC++; + break; + case Enum: + nEnumDecls++; + break; + } +} + +// Out-of-line virtual method providing a home for Decl. +Decl::~Decl() { +} + +const char *Decl::getName() const { + if (const IdentifierInfo *II = getIdentifier()) + return II->getName(); + return ""; +} + + +FunctionDecl::~FunctionDecl() { + delete[] ParamInfo; +} + +unsigned FunctionDecl::getNumParams() const { + return cast<FunctionTypeProto>(getType().getTypePtr())->getNumArgs(); +} + +void FunctionDecl::setParams(ParmVarDecl **NewParamInfo, unsigned NumParams) { + assert(ParamInfo == 0 && "Already has param info!"); + assert(NumParams == getNumParams() && "Parameter count mismatch!"); + + // Zero params -> null pointer. + if (NumParams) { + ParamInfo = new ParmVarDecl*[NumParams]; + memcpy(ParamInfo, NewParamInfo, sizeof(ParmVarDecl*)*NumParams); + } +} + + +/// defineBody - When created, RecordDecl's correspond to a forward declared +/// record. This method is used to mark the decl as being defined, with the +/// specified contents. +void RecordDecl::defineBody(FieldDecl **members, unsigned numMembers) { + assert(!isDefinition() && "Cannot redefine record!"); + setDefinition(true); + NumMembers = numMembers; + if (numMembers) { + Members = new FieldDecl*[numMembers]; + memcpy(Members, members, numMembers*sizeof(Decl*)); + } +} + +FieldDecl* RecordDecl::getMember(IdentifierInfo *name) { + if (Members == 0 || NumMembers < 0) + return 0; + + // linear search. When C++ classes come along, will likely need to revisit. + for (int i = 0; i < NumMembers; ++i) { + if (Members[i]->getIdentifier() == name) + return Members[i]; + } + return 0; +}
\ No newline at end of file diff --git a/AST/Expr.cpp b/AST/Expr.cpp new file mode 100644 index 0000000000..b7dbcc7de6 --- /dev/null +++ b/AST/Expr.cpp @@ -0,0 +1,536 @@ +//===--- Expr.cpp - Expression AST Node Implementation --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Expr class and subclasses. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Expr.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Lex/IdentifierTable.h" +using namespace clang; + +//===----------------------------------------------------------------------===// +// Primary Expressions. +//===----------------------------------------------------------------------===// + +StringLiteral::StringLiteral(const char *strData, unsigned byteLength, + bool Wide, QualType t, SourceLocation firstLoc, + SourceLocation lastLoc) : + Expr(StringLiteralClass, t) { + // OPTIMIZE: could allocate this appended to the StringLiteral. + char *AStrData = new char[byteLength]; + memcpy(AStrData, strData, byteLength); + StrData = AStrData; + ByteLength = byteLength; + IsWide = Wide; + firstTokLoc = firstLoc; + lastTokLoc = lastLoc; +} + +StringLiteral::~StringLiteral() { + delete[] StrData; +} + +bool UnaryOperator::isPostfix(Opcode Op) { + switch (Op) { + case PostInc: + case PostDec: + return true; + default: + return false; + } +} + +/// getOpcodeStr - Turn an Opcode enum value into the punctuation char it +/// corresponds to, e.g. "sizeof" or "[pre]++". +const char *UnaryOperator::getOpcodeStr(Opcode Op) { + switch (Op) { + default: assert(0 && "Unknown unary operator"); + case PostInc: return "++"; + case PostDec: return "--"; + case PreInc: return "++"; + case PreDec: return "--"; + case AddrOf: return "&"; + case Deref: return "*"; + case Plus: return "+"; + case Minus: return "-"; + case Not: return "~"; + case LNot: return "!"; + case Real: return "__real"; + case Imag: return "__imag"; + case SizeOf: return "sizeof"; + case AlignOf: return "alignof"; + case Extension: return "__extension__"; + } +} + +//===----------------------------------------------------------------------===// +// Postfix Operators. +//===----------------------------------------------------------------------===// + +CallExpr::CallExpr(Expr *fn, Expr **args, unsigned numargs, QualType t, + SourceLocation rparenloc) + : Expr(CallExprClass, t), Fn(fn), NumArgs(numargs) { + Args = new Expr*[numargs]; + for (unsigned i = 0; i != numargs; ++i) + Args[i] = args[i]; + RParenLoc = rparenloc; +} + +/// getOpcodeStr - Turn an Opcode enum value into the punctuation char it +/// corresponds to, e.g. "<<=". +const char *BinaryOperator::getOpcodeStr(Opcode Op) { + switch (Op) { + default: assert(0 && "Unknown binary operator"); + case Mul: return "*"; + case Div: return "/"; + case Rem: return "%"; + case Add: return "+"; + case Sub: return "-"; + case Shl: return "<<"; + case Shr: return ">>"; + case LT: return "<"; + case GT: return ">"; + case LE: return "<="; + case GE: return ">="; + case EQ: return "=="; + case NE: return "!="; + case And: return "&"; + case Xor: return "^"; + case Or: return "|"; + case LAnd: return "&&"; + case LOr: return "||"; + case Assign: return "="; + case MulAssign: return "*="; + case DivAssign: return "/="; + case RemAssign: return "%="; + case AddAssign: return "+="; + case SubAssign: return "-="; + case ShlAssign: return "<<="; + case ShrAssign: return ">>="; + case AndAssign: return "&="; + case XorAssign: return "^="; + case OrAssign: return "|="; + case Comma: return ","; + } +} + + +//===----------------------------------------------------------------------===// +// Generic Expression Routines +//===----------------------------------------------------------------------===// + +/// hasLocalSideEffect - Return true if this immediate expression has side +/// effects, not counting any sub-expressions. +bool Expr::hasLocalSideEffect() const { + switch (getStmtClass()) { + default: + return false; + case ParenExprClass: + return cast<ParenExpr>(this)->getSubExpr()->hasLocalSideEffect(); + case UnaryOperatorClass: { + const UnaryOperator *UO = cast<UnaryOperator>(this); + + switch (UO->getOpcode()) { + default: return false; + case UnaryOperator::PostInc: + case UnaryOperator::PostDec: + case UnaryOperator::PreInc: + case UnaryOperator::PreDec: + return true; // ++/-- + + case UnaryOperator::Deref: + // Dereferencing a volatile pointer is a side-effect. + return getType().isVolatileQualified(); + case UnaryOperator::Real: + case UnaryOperator::Imag: + // accessing a piece of a volatile complex is a side-effect. + return UO->getSubExpr()->getType().isVolatileQualified(); + + case UnaryOperator::Extension: + return UO->getSubExpr()->hasLocalSideEffect(); + } + } + case BinaryOperatorClass: + return cast<BinaryOperator>(this)->isAssignmentOp(); + + case MemberExprClass: + case ArraySubscriptExprClass: + // If the base pointer or element is to a volatile pointer/field, accessing + // if is a side effect. + return getType().isVolatileQualified(); + + case CallExprClass: + // TODO: check attributes for pure/const. "void foo() { strlen("bar"); }" + // should warn. + return true; + + case CastExprClass: + // If this is a cast to void, check the operand. Otherwise, the result of + // the cast is unused. + if (getType()->isVoidType()) + return cast<CastExpr>(this)->getSubExpr()->hasLocalSideEffect(); + return false; + } +} + +/// isLvalue - C99 6.3.2.1: an lvalue is an expression with an object type or an +/// incomplete type other than void. Nonarray expressions that can be lvalues: +/// - name, where name must be a variable +/// - e[i] +/// - (e), where e must be an lvalue +/// - e.name, where e must be an lvalue +/// - e->name +/// - *e, the type of e cannot be a function type +/// - string-constant +/// +Expr::isLvalueResult Expr::isLvalue() { + // first, check the type (C99 6.3.2.1) + if (isa<FunctionType>(TR.getCanonicalType())) // from isObjectType() + return LV_NotObjectType; + + if (TR->isIncompleteType() && TR->isVoidType()) + return LV_IncompleteVoidType; + + // the type looks fine, now check the expression + switch (getStmtClass()) { + case StringLiteralClass: // C99 6.5.1p4 + case ArraySubscriptExprClass: // C99 6.5.3p4 (e1[e2] == (*((e1)+(e2)))) + // For vectors, make sure base is an lvalue (i.e. not a function call). + if (cast<ArraySubscriptExpr>(this)->getBase()->getType()->isVectorType()) + return cast<ArraySubscriptExpr>(this)->getBase()->isLvalue(); + return LV_Valid; + case DeclRefExprClass: // C99 6.5.1p2 + if (isa<VarDecl>(cast<DeclRefExpr>(this)->getDecl())) + return LV_Valid; + break; + case MemberExprClass: // C99 6.5.2.3p4 + const MemberExpr *m = cast<MemberExpr>(this); + return m->isArrow() ? LV_Valid : m->getBase()->isLvalue(); + case UnaryOperatorClass: // C99 6.5.3p4 + if (cast<UnaryOperator>(this)->getOpcode() == UnaryOperator::Deref) + return LV_Valid; + break; + case ParenExprClass: // C99 6.5.1p5 + return cast<ParenExpr>(this)->getSubExpr()->isLvalue(); + default: + break; + } + return LV_InvalidExpression; +} + +/// isModifiableLvalue - C99 6.3.2.1: an lvalue that does not have array type, +/// does not have an incomplete type, does not have a const-qualified type, and +/// if it is a structure or union, does not have any member (including, +/// recursively, any member or element of all contained aggregates or unions) +/// with a const-qualified type. +Expr::isModifiableLvalueResult Expr::isModifiableLvalue() { + isLvalueResult lvalResult = isLvalue(); + + switch (lvalResult) { + case LV_Valid: break; + case LV_NotObjectType: return MLV_NotObjectType; + case LV_IncompleteVoidType: return MLV_IncompleteVoidType; + case LV_InvalidExpression: return MLV_InvalidExpression; + } + if (TR.isConstQualified()) + return MLV_ConstQualified; + if (TR->isArrayType()) + return MLV_ArrayType; + if (TR->isIncompleteType()) + return MLV_IncompleteType; + + if (const RecordType *r = dyn_cast<RecordType>(TR.getCanonicalType())) { + if (r->hasConstFields()) + return MLV_ConstQualified; + } + return MLV_Valid; +} + +/// isIntegerConstantExpr - this recursive routine will test if an expression is +/// an integer constant expression. Note: With the introduction of VLA's in +/// C99 the result of the sizeof operator is no longer always a constant +/// expression. The generalization of the wording to include any subexpression +/// that is not evaluated (C99 6.6p3) means that nonconstant subexpressions +/// can appear as operands to other operators (e.g. &&, ||, ?:). For instance, +/// "0 || f()" can be treated as a constant expression. In C90 this expression, +/// occurring in a context requiring a constant, would have been a constraint +/// violation. FIXME: This routine currently implements C90 semantics. +/// To properly implement C99 semantics this routine will need to evaluate +/// expressions involving operators previously mentioned. + +/// FIXME: Pass up a reason why! Invalid operation in i-c-e, division by zero, +/// comma, etc +/// +/// FIXME: This should ext-warn on overflow during evaluation! ISO C does not +/// permit this. +bool Expr::isIntegerConstantExpr(llvm::APSInt &Result, SourceLocation *Loc, + bool isEvaluated) const { + switch (getStmtClass()) { + default: + if (Loc) *Loc = getLocStart(); + return false; + case ParenExprClass: + return cast<ParenExpr>(this)->getSubExpr()-> + isIntegerConstantExpr(Result, Loc, isEvaluated); + case IntegerLiteralClass: + Result = cast<IntegerLiteral>(this)->getValue(); + break; + case CharacterLiteralClass: + // FIXME: This doesn't set the right width etc. + Result.zextOrTrunc(32); // FIXME: NOT RIGHT IN GENERAL. + Result = cast<CharacterLiteral>(this)->getValue(); + break; + case DeclRefExprClass: + if (const EnumConstantDecl *D = + dyn_cast<EnumConstantDecl>(cast<DeclRefExpr>(this)->getDecl())) { + Result = D->getInitVal(); + break; + } + if (Loc) *Loc = getLocStart(); + return false; + case UnaryOperatorClass: { + const UnaryOperator *Exp = cast<UnaryOperator>(this); + + // Get the operand value. If this is sizeof/alignof, do not evalute the + // operand. This affects C99 6.6p3. + if (Exp->isSizeOfAlignOfOp()) isEvaluated = false; + if (!Exp->getSubExpr()->isIntegerConstantExpr(Result, Loc, isEvaluated)) + return false; + + switch (Exp->getOpcode()) { + // Address, indirect, pre/post inc/dec, etc are not valid constant exprs. + // See C99 6.6p3. + default: + if (Loc) *Loc = Exp->getOperatorLoc(); + return false; + case UnaryOperator::Extension: + return true; + case UnaryOperator::SizeOf: + case UnaryOperator::AlignOf: + // sizeof(vla) is not a constantexpr: C99 6.5.3.4p2. + if (!Exp->getSubExpr()->getType()->isConstantSizeType(Loc)) + return false; + + // FIXME: Evaluate sizeof/alignof. + Result.zextOrTrunc(32); // FIXME: NOT RIGHT IN GENERAL. + Result = 1; // FIXME: Obviously bogus + break; + case UnaryOperator::LNot: { + bool Val = Result != 0; + Result.zextOrTrunc(32); // FIXME: NOT RIGHT IN GENERAL. + Result = Val; + break; + } + case UnaryOperator::Plus: + // FIXME: Do usual unary promotions here! + break; + case UnaryOperator::Minus: + // FIXME: Do usual unary promotions here! + Result = -Result; + break; + case UnaryOperator::Not: + // FIXME: Do usual unary promotions here! + Result = ~Result; + break; + } + break; + } + case SizeOfAlignOfTypeExprClass: { + const SizeOfAlignOfTypeExpr *Exp = cast<SizeOfAlignOfTypeExpr>(this); + // alignof always evaluates to a constant. + if (Exp->isSizeOf() && !Exp->getArgumentType()->isConstantSizeType(Loc)) + return false; + + // FIXME: Evaluate sizeof/alignof. + Result.zextOrTrunc(32); // FIXME: NOT RIGHT IN GENERAL. + Result = 1; // FIXME: Obviously bogus + break; + } + case BinaryOperatorClass: { + const BinaryOperator *Exp = cast<BinaryOperator>(this); + + // The LHS of a constant expr is always evaluated and needed. + if (!Exp->getLHS()->isIntegerConstantExpr(Result, Loc, isEvaluated)) + return false; + + llvm::APSInt RHS(Result); + + // The short-circuiting &&/|| operators don't necessarily evaluate their + // RHS. Make sure to pass isEvaluated down correctly. + if (Exp->isLogicalOp()) { + bool RHSEval; + if (Exp->getOpcode() == BinaryOperator::LAnd) + RHSEval = Result != 0; + else { + assert(Exp->getOpcode() == BinaryOperator::LOr &&"Unexpected logical"); + RHSEval = Result == 0; + } + + if (!Exp->getRHS()->isIntegerConstantExpr(RHS, Loc, + isEvaluated & RHSEval)) + return false; + } else { + if (!Exp->getRHS()->isIntegerConstantExpr(RHS, Loc, isEvaluated)) + return false; + } + + // FIXME: These should all do the standard promotions, etc. + switch (Exp->getOpcode()) { + default: + if (Loc) *Loc = getLocStart(); + return false; + case BinaryOperator::Mul: + Result *= RHS; + break; + case BinaryOperator::Div: + if (RHS == 0) { + if (!isEvaluated) break; + if (Loc) *Loc = getLocStart(); + return false; + } + Result /= RHS; + break; + case BinaryOperator::Rem: + if (RHS == 0) { + if (!isEvaluated) break; + if (Loc) *Loc = getLocStart(); + return false; + } + Result %= RHS; + break; + case BinaryOperator::Add: Result += RHS; break; + case BinaryOperator::Sub: Result -= RHS; break; + case BinaryOperator::Shl: + Result <<= RHS.getLimitedValue(Result.getBitWidth()-1); + break; + case BinaryOperator::Shr: + Result >>= RHS.getLimitedValue(Result.getBitWidth()-1); + break; + case BinaryOperator::LT: Result = Result < RHS; break; + case BinaryOperator::GT: Result = Result > RHS; break; + case BinaryOperator::LE: Result = Result <= RHS; break; + case BinaryOperator::GE: Result = Result >= RHS; break; + case BinaryOperator::EQ: Result = Result == RHS; break; + case BinaryOperator::NE: Result = Result != RHS; break; + case BinaryOperator::And: Result &= RHS; break; + case BinaryOperator::Xor: Result ^= RHS; break; + case BinaryOperator::Or: Result |= RHS; break; + case BinaryOperator::LAnd: + Result = Result != 0 && RHS != 0; + break; + case BinaryOperator::LOr: + Result = Result != 0 || RHS != 0; + break; + + case BinaryOperator::Comma: + // C99 6.6p3: "shall not contain assignment, ..., or comma operators, + // *except* when they are contained within a subexpression that is not + // evaluated". Note that Assignment can never happen due to constraints + // on the LHS subexpr, so we don't need to check it here. + if (isEvaluated) { + if (Loc) *Loc = getLocStart(); + return false; + } + + // The result of the constant expr is the RHS. + Result = RHS; + return true; + } + + assert(!Exp->isAssignmentOp() && "LHS can't be a constant expr!"); + break; + } + case CastExprClass: { + const CastExpr *Exp = cast<CastExpr>(this); + // C99 6.6p6: shall only convert arithmetic types to integer types. + if (!Exp->getSubExpr()->getType()->isArithmeticType() || + !Exp->getDestType()->isIntegerType()) { + if (Loc) *Loc = Exp->getSubExpr()->getLocStart(); + return false; + } + + // Handle simple integer->integer casts. + if (Exp->getSubExpr()->getType()->isIntegerType()) { + if (!Exp->getSubExpr()->isIntegerConstantExpr(Result, Loc, isEvaluated)) + return false; + // FIXME: do the conversion on Result. + break; + } + + // Allow floating constants that are the immediate operands of casts or that + // are parenthesized. + const Expr *Operand = Exp->getSubExpr(); + while (const ParenExpr *PE = dyn_cast<ParenExpr>(Operand)) + Operand = PE->getSubExpr(); + + if (const FloatingLiteral *FL = dyn_cast<FloatingLiteral>(Operand)) { + // FIXME: Evaluate this correctly! + Result = (int)FL->getValue(); + break; + } + if (Loc) *Loc = Operand->getLocStart(); + return false; + } + case ConditionalOperatorClass: { + const ConditionalOperator *Exp = cast<ConditionalOperator>(this); + + if (!Exp->getCond()->isIntegerConstantExpr(Result, Loc, isEvaluated)) + return false; + + const Expr *TrueExp = Exp->getLHS(); + const Expr *FalseExp = Exp->getRHS(); + if (Result == 0) std::swap(TrueExp, FalseExp); + + // Evaluate the false one first, discard the result. + if (!FalseExp->isIntegerConstantExpr(Result, Loc, false)) + return false; + // Evalute the true one, capture the result. + if (!TrueExp->isIntegerConstantExpr(Result, Loc, isEvaluated)) + return false; + // FIXME: promotions on result. + break; + } + } + + // Cases that are valid constant exprs fall through to here. + Result.setIsUnsigned(getType()->isUnsignedIntegerType()); + return true; +} + + +/// isNullPointerConstant - C99 6.3.2.3p3 - Return true if this is either an +/// integer constant expression with the value zero, or if this is one that is +/// cast to void*. +bool Expr::isNullPointerConstant() const { + // Strip off a cast to void*, if it exists. + if (const CastExpr *CE = dyn_cast<CastExpr>(this)) { + // Check that it is a cast to void*. + if (const PointerType *PT = dyn_cast<PointerType>(CE->getType())) { + QualType Pointee = PT->getPointeeType(); + if (Pointee.getQualifiers() == 0 && Pointee->isVoidType() && // to void* + CE->getSubExpr()->getType()->isIntegerType()) // from int. + return CE->getSubExpr()->isNullPointerConstant(); + } + } else if (const ParenExpr *PE = dyn_cast<ParenExpr>(this)) { + // Accept ((void*)0) as a null pointer constant, as many other + // implementations do. + return PE->getSubExpr()->isNullPointerConstant(); + } + + // This expression must be an integer type. + if (!getType()->isIntegerType()) + return false; + + // If we have an integer constant expression, we need to *evaluate* it and + // test for the value 0. + llvm::APSInt Val(32); + return isIntegerConstantExpr(Val, 0, true) && Val == 0; +} diff --git a/AST/Makefile b/AST/Makefile new file mode 100644 index 0000000000..17abef6e73 --- /dev/null +++ b/AST/Makefile @@ -0,0 +1,22 @@ +##===- clang/AST/Makefile ----------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file was developed by Chris Lattner and is distributed under +# the University of Illinois Open Source License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This implements the AST library for the C-Language front-end. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME := clangAST +BUILD_ARCHIVE = 1 +CXXFLAGS = -fno-rtti + +CPPFLAGS += -I$(PROJ_SRC_DIR)/../include + +include $(LEVEL)/Makefile.common + diff --git a/AST/Stmt.cpp b/AST/Stmt.cpp new file mode 100644 index 0000000000..e43f03c404 --- /dev/null +++ b/AST/Stmt.cpp @@ -0,0 +1,81 @@ +//===--- Stmt.cpp - Statement AST Node Implementation ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Stmt class and statement subclasses. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/Stmt.h" +#include "clang/AST/ExprCXX.h" +#include "clang/AST/StmtVisitor.h" +#include "clang/Lex/IdentifierTable.h" +using namespace clang; + +// Implement all the AST node visit methods using the StmtNodes.def database. +#define STMT(N, CLASS, PARENT) \ +void CLASS::visit(StmtVisitor &V) { return V.Visit##CLASS(this); } + +STMT(0, Stmt, ) +#include "clang/AST/StmtNodes.def" + +static struct StmtClassNameTable { + int enumValue; + const char *className; + unsigned counter; + unsigned size; +} sNames[] = { +#define STMT(N, CLASS, PARENT) { N, #CLASS, 0, sizeof(CLASS) }, +#include "clang/AST/StmtNodes.def" + { 0, 0, 0, 0 } +}; + +const char *Stmt::getStmtClassName() const { + for (int i = 0; sNames[i].className; i++) { + if (sClass == sNames[i].enumValue) + return sNames[i].className; + } + return 0; // should never happen.... +} + +void Stmt::PrintStats() { + unsigned sum = 0; + fprintf(stderr, "*** Stmt/Expr Stats:\n"); + for (int i = 0; sNames[i].className; i++) { + sum += sNames[i].counter; + } + fprintf(stderr, " %d stmts/exprs total.\n", sum); + sum = 0; + for (int i = 0; sNames[i].className; i++) { + fprintf(stderr, " %d %s, %d each (%d bytes)\n", + sNames[i].counter, sNames[i].className, sNames[i].size, sNames[i].counter*sNames[i].size); + sum += sNames[i].counter*sNames[i].size; + } + fprintf(stderr, "Total bytes = %d\n", sum); +} + +void Stmt::addStmtClass(StmtClass s) { + for (int i = 0; sNames[i].className; i++) { + if (s == sNames[i].enumValue) + sNames[i].counter++; + } +} + +static bool StatSwitch = false; + +bool Stmt::CollectingStats(bool enable) { + if (enable) StatSwitch = true; + return StatSwitch; +} + + + +const char *LabelStmt::getName() const { + return getID()->getName(); +} + diff --git a/AST/StmtPrinter.cpp b/AST/StmtPrinter.cpp new file mode 100644 index 0000000000..e90b9f2c3f --- /dev/null +++ b/AST/StmtPrinter.cpp @@ -0,0 +1,436 @@ +//===--- StmtPrinter.cpp - Printing implementation for Stmt ASTs ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Stmt::dump/Stmt::print methods. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/Decl.h" +#include "clang/AST/ExprCXX.h" +#include "clang/Lex/IdentifierTable.h" +#include "llvm/Support/Compiler.h" +#include <iostream> +using namespace clang; + +//===----------------------------------------------------------------------===// +// StmtPrinter Visitor +//===----------------------------------------------------------------------===// + +namespace { + class VISIBILITY_HIDDEN StmtPrinter : public StmtVisitor { + std::ostream &OS; + unsigned IndentLevel; + public: + StmtPrinter(std::ostream &os) : OS(os), IndentLevel(0) {} + + void PrintStmt(Stmt *S, int SubIndent = 1) { + IndentLevel += SubIndent; + if (S && isa<Expr>(S)) { + // If this is an expr used in a stmt context, indent and newline it. + Indent(); + S->visit(*this); + OS << ";\n"; + } else if (S) { + S->visit(*this); + } else { + Indent() << "<<<NULL STATEMENT>>>\n"; + } + IndentLevel -= SubIndent; + } + + void PrintRawCompoundStmt(CompoundStmt *S); + void PrintRawDecl(Decl *D); + void PrintRawIfStmt(IfStmt *If); + + void PrintExpr(Expr *E) { + if (E) + E->visit(*this); + else + OS << "<null expr>"; + } + + std::ostream &Indent(int Delta = 0) const { + for (int i = 0, e = IndentLevel+Delta; i < e; ++i) + OS << " "; + return OS; + } + + virtual void VisitStmt(Stmt *Node); +#define STMT(N, CLASS, PARENT) \ + virtual void Visit##CLASS(CLASS *Node); +#include "clang/AST/StmtNodes.def" + }; +} + +//===----------------------------------------------------------------------===// +// Stmt printing methods. +//===----------------------------------------------------------------------===// + +void StmtPrinter::VisitStmt(Stmt *Node) { + Indent() << "<<unknown stmt type>>\n"; +} + +/// PrintRawCompoundStmt - Print a compound stmt without indenting the {, and +/// with no newline after the }. +void StmtPrinter::PrintRawCompoundStmt(CompoundStmt *Node) { + OS << "{\n"; + for (CompoundStmt::body_iterator I = Node->body_begin(), E = Node->body_end(); + I != E; ++I) + PrintStmt(*I); + + Indent() << "}"; +} + +void StmtPrinter::PrintRawDecl(Decl *D) { + // FIXME: Need to complete/beautify this... this code simply shows the + // nodes are where they need to be. + if (TypedefDecl *localType = dyn_cast<TypedefDecl>(D)) { + OS << "typedef " << localType->getUnderlyingType().getAsString(); + OS << " " << localType->getName(); + } else if (ValueDecl *VD = dyn_cast<ValueDecl>(D)) { + // Emit storage class for vardecls. + if (VarDecl *V = dyn_cast<VarDecl>(VD)) { + switch (V->getStorageClass()) { + default: assert(0 && "Unknown storage class!"); + case VarDecl::None: break; + case VarDecl::Extern: OS << "extern "; break; + case VarDecl::Static: OS << "static "; break; + case VarDecl::Auto: OS << "auto "; break; + case VarDecl::Register: OS << "register "; break; + } + } + + std::string Name = VD->getName(); + VD->getType().getAsStringInternal(Name); + OS << Name; + + // FIXME: Initializer for vardecl + } else { + // FIXME: "struct x;" + assert(0 && "Unexpected decl"); + } +} + + +void StmtPrinter::VisitNullStmt(NullStmt *Node) { + Indent() << ";\n"; +} + +void StmtPrinter::VisitDeclStmt(DeclStmt *Node) { + for (Decl *D = Node->getDecl(); D; D = D->getNextDeclarator()) { + Indent(); + PrintRawDecl(D); + OS << ";\n"; + } +} + +void StmtPrinter::VisitCompoundStmt(CompoundStmt *Node) { + Indent(); + PrintRawCompoundStmt(Node); + OS << "\n"; +} + +void StmtPrinter::VisitCaseStmt(CaseStmt *Node) { + Indent(-1) << "case "; + PrintExpr(Node->getLHS()); + if (Node->getRHS()) { + OS << " ... "; + PrintExpr(Node->getRHS()); + } + OS << ":\n"; + + PrintStmt(Node->getSubStmt(), 0); +} + +void StmtPrinter::VisitDefaultStmt(DefaultStmt *Node) { + Indent(-1) << "default:\n"; + PrintStmt(Node->getSubStmt(), 0); +} + +void StmtPrinter::VisitLabelStmt(LabelStmt *Node) { + Indent(-1) << Node->getName() << ":\n"; + PrintStmt(Node->getSubStmt(), 0); +} + +void StmtPrinter::PrintRawIfStmt(IfStmt *If) { + OS << "if "; + PrintExpr(If->getCond()); + + if (CompoundStmt *CS = dyn_cast<CompoundStmt>(If->getThen())) { + OS << ' '; + PrintRawCompoundStmt(CS); + OS << (If->getElse() ? ' ' : '\n'); + } else { + OS << '\n'; + PrintStmt(If->getThen()); + if (If->getElse()) Indent(); + } + + if (Stmt *Else = If->getElse()) { + OS << "else"; + + if (CompoundStmt *CS = dyn_cast<CompoundStmt>(Else)) { + OS << ' '; + PrintRawCompoundStmt(CS); + OS << '\n'; + } else if (IfStmt *ElseIf = dyn_cast<IfStmt>(Else)) { + OS << ' '; + PrintRawIfStmt(ElseIf); + } else { + OS << '\n'; + PrintStmt(If->getElse()); + } + } +} + +void StmtPrinter::VisitIfStmt(IfStmt *If) { + Indent(); + PrintRawIfStmt(If); +} + +void StmtPrinter::VisitSwitchStmt(SwitchStmt *Node) { + Indent() << "switch ("; + PrintExpr(Node->getCond()); + OS << ")"; + + // Pretty print compoundstmt bodies (very common). + if (CompoundStmt *CS = dyn_cast<CompoundStmt>(Node->getBody())) { + OS << " "; + PrintRawCompoundStmt(CS); + OS << "\n"; + } else { + OS << "\n"; + PrintStmt(Node->getBody()); + } +} + +void StmtPrinter::VisitWhileStmt(WhileStmt *Node) { + Indent() << "while ("; + PrintExpr(Node->getCond()); + OS << ")\n"; + PrintStmt(Node->getBody()); +} + +void StmtPrinter::VisitDoStmt(DoStmt *Node) { + Indent() << "do\n"; + PrintStmt(Node->getBody()); + Indent() << "while "; + PrintExpr(Node->getCond()); + OS << ";\n"; +} + +void StmtPrinter::VisitForStmt(ForStmt *Node) { + Indent() << "for ("; + if (Node->getInit()) { + if (DeclStmt *DS = dyn_cast<DeclStmt>(Node->getInit())) + PrintRawDecl(DS->getDecl()); + else + PrintExpr(cast<Expr>(Node->getInit())); + } + OS << "; "; + if (Node->getCond()) + PrintExpr(Node->getCond()); + OS << "; "; + if (Node->getInc()) + PrintExpr(Node->getInc()); + OS << ")\n"; + PrintStmt(Node->getBody()); +} + +void StmtPrinter::VisitGotoStmt(GotoStmt *Node) { + Indent() << "goto " << Node->getLabel()->getName() << ";\n"; +} + +void StmtPrinter::VisitIndirectGotoStmt(IndirectGotoStmt *Node) { + Indent() << "goto *"; + PrintExpr(Node->getTarget()); + OS << ";\n"; +} + +void StmtPrinter::VisitContinueStmt(ContinueStmt *Node) { + Indent() << "continue;\n"; +} + +void StmtPrinter::VisitBreakStmt(BreakStmt *Node) { + Indent() << "break;\n"; +} + + +void StmtPrinter::VisitReturnStmt(ReturnStmt *Node) { + Indent() << "return"; + if (Node->getRetValue()) { + OS << " "; + PrintExpr(Node->getRetValue()); + } + OS << ";\n"; +} + +//===----------------------------------------------------------------------===// +// Expr printing methods. +//===----------------------------------------------------------------------===// + +void StmtPrinter::VisitExpr(Expr *Node) { + OS << "<<unknown expr type>>"; +} + +void StmtPrinter::VisitDeclRefExpr(DeclRefExpr *Node) { + OS << Node->getDecl()->getName(); +} + +void StmtPrinter::VisitCharacterLiteral(CharacterLiteral *Node) { + // FIXME: print value. + OS << "x"; +} + +void StmtPrinter::VisitIntegerLiteral(IntegerLiteral *Node) { + bool isSigned = Node->getType()->isSignedIntegerType(); + OS << Node->getValue().toString(10, isSigned); + + // Emit suffixes. Integer literals are always a builtin integer type. + switch (cast<BuiltinType>(Node->getType().getCanonicalType())->getKind()) { + default: assert(0 && "Unexpected type for integer literal!"); + case BuiltinType::Int: break; // no suffix. + case BuiltinType::UInt: OS << 'U'; break; + case BuiltinType::Long: OS << 'L'; break; + case BuiltinType::ULong: OS << "UL"; break; + case BuiltinType::LongLong: OS << "LL"; break; + case BuiltinType::ULongLong: OS << "ULL"; break; + } +} +void StmtPrinter::VisitFloatingLiteral(FloatingLiteral *Node) { + // FIXME: print value. + OS << "~1.0~"; +} +void StmtPrinter::VisitStringLiteral(StringLiteral *Str) { + if (Str->isWide()) OS << 'L'; + OS << '"'; + + // FIXME: this doesn't print wstrings right. + for (unsigned i = 0, e = Str->getByteLength(); i != e; ++i) { + switch (Str->getStrData()[i]) { + default: OS << Str->getStrData()[i]; break; + // Handle some common ones to make dumps prettier. + case '\\': OS << "\\\\"; break; + case '"': OS << "\\\""; break; + case '\n': OS << "\\n"; break; + case '\t': OS << "\\t"; break; + case '\a': OS << "\\a"; break; + case '\b': OS << "\\b"; break; + } + } + OS << '"'; +} +void StmtPrinter::VisitParenExpr(ParenExpr *Node) { + OS << "("; + PrintExpr(Node->getSubExpr()); + OS << ")"; +} +void StmtPrinter::VisitUnaryOperator(UnaryOperator *Node) { + if (!Node->isPostfix()) + OS << UnaryOperator::getOpcodeStr(Node->getOpcode()); + PrintExpr(Node->getSubExpr()); + + if (Node->isPostfix()) + OS << UnaryOperator::getOpcodeStr(Node->getOpcode()); + +} +void StmtPrinter::VisitSizeOfAlignOfTypeExpr(SizeOfAlignOfTypeExpr *Node) { + OS << (Node->isSizeOf() ? "sizeof(" : "__alignof("); + OS << Node->getArgumentType().getAsString() << ")"; +} +void StmtPrinter::VisitArraySubscriptExpr(ArraySubscriptExpr *Node) { + PrintExpr(Node->getBase()); + OS << "["; + PrintExpr(Node->getIdx()); + OS << "]"; +} + +void StmtPrinter::VisitCallExpr(CallExpr *Call) { + PrintExpr(Call->getCallee()); + OS << "("; + for (unsigned i = 0, e = Call->getNumArgs(); i != e; ++i) { + if (i) OS << ", "; + PrintExpr(Call->getArg(i)); + } + OS << ")"; +} +void StmtPrinter::VisitMemberExpr(MemberExpr *Node) { + PrintExpr(Node->getBase()); + OS << (Node->isArrow() ? "->" : "."); + + FieldDecl *Field = Node->getMemberDecl(); + assert(Field && "MemberExpr should alway reference a field!"); + OS << Field->getName(); +} +void StmtPrinter::VisitCastExpr(CastExpr *Node) { + OS << "(" << Node->getDestType().getAsString() << ")"; + PrintExpr(Node->getSubExpr()); +} +void StmtPrinter::VisitBinaryOperator(BinaryOperator *Node) { + PrintExpr(Node->getLHS()); + OS << " " << BinaryOperator::getOpcodeStr(Node->getOpcode()) << " "; + PrintExpr(Node->getRHS()); +} +void StmtPrinter::VisitConditionalOperator(ConditionalOperator *Node) { + PrintExpr(Node->getCond()); + OS << " ? "; + PrintExpr(Node->getLHS()); + OS << " : "; + PrintExpr(Node->getRHS()); +} + +// GNU extensions. + +void StmtPrinter::VisitAddrLabel(AddrLabel *Node) { + OS << "&&" << Node->getLabel()->getName(); + +} + +// C++ + +void StmtPrinter::VisitCXXCastExpr(CXXCastExpr *Node) { + switch (Node->getOpcode()) { + default: + assert(0 && "Not a C++ cast expression"); + abort(); + case CXXCastExpr::ConstCast: OS << "const_cast<"; break; + case CXXCastExpr::DynamicCast: OS << "dynamic_cast<"; break; + case CXXCastExpr::ReinterpretCast: OS << "reinterpret_cast<"; break; + case CXXCastExpr::StaticCast: OS << "static_cast<"; break; + } + + OS << Node->getDestType().getAsString() << ">("; + PrintExpr(Node->getSubExpr()); + OS << ")"; +} + +void StmtPrinter::VisitCXXBoolLiteralExpr(CXXBoolLiteralExpr *Node) { + OS << (Node->getValue() ? "true" : "false"); +} + + +//===----------------------------------------------------------------------===// +// Stmt method implementations +//===----------------------------------------------------------------------===// + +void Stmt::dump() const { + // FIXME: eliminate use of <iostream> + print(std::cerr); +} + +void Stmt::print(std::ostream &OS) const { + if (this == 0) { + OS << "<NULL>"; + return; + } + + StmtPrinter P(OS); + const_cast<Stmt*>(this)->visit(P); +} diff --git a/AST/StmtVisitor.cpp b/AST/StmtVisitor.cpp new file mode 100644 index 0000000000..9171ef7566 --- /dev/null +++ b/AST/StmtVisitor.cpp @@ -0,0 +1,26 @@ +//===--- StmtVisitor.cpp - Visitor for Stmt subclasses --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the StmtVisitor class. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/StmtVisitor.h" +#include "clang/AST/ExprCXX.h" +using namespace clang; + +StmtVisitor::~StmtVisitor() { + // Out-of-line virtual dtor. +} + +// Implement all of the delegation visitor methods. +#define STMT(N, FROM, TO) \ + void StmtVisitor::Visit##FROM(FROM *Node) { Visit##TO(Node); } +#include "clang/AST/StmtNodes.def" + diff --git a/AST/Type.cpp b/AST/Type.cpp new file mode 100644 index 0000000000..cd06c7e783 --- /dev/null +++ b/AST/Type.cpp @@ -0,0 +1,593 @@ +//===--- Type.cpp - Type representation and manipulation ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements type-related functionality. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/IdentifierTable.h" +#include "clang/AST/Type.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/Support/Streams.h" +#include "llvm/ADT/StringExtras.h" +using namespace clang; + +Type::~Type() {} + +/// isVoidType - Helper method to determine if this is the 'void' type. +bool Type::isVoidType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() == BuiltinType::Void; + return false; +} + +bool Type::isObjectType() const { + if (isa<FunctionType>(CanonicalType)) + return false; + else if (CanonicalType->isIncompleteType()) + return false; + else + return true; +} + +bool Type::isDerivedType() const { + switch (CanonicalType->getTypeClass()) { + case Pointer: + case Array: + case FunctionProto: + case FunctionNoProto: + case Reference: + return true; + case Tagged: { + const TagType *TT = cast<TagType>(CanonicalType); + const Decl::Kind Kind = TT->getDecl()->getKind(); + return Kind == Decl::Struct || Kind == Decl::Union; + } + default: + return false; + } +} + +bool Type::isFunctionType() const { + return isa<FunctionType>(CanonicalType); +} + +bool Type::isPointerType() const { + return isa<PointerType>(CanonicalType); +} + +bool Type::isReferenceType() const { + return isa<ReferenceType>(CanonicalType); +} + +bool Type::isArrayType() const { + return isa<ArrayType>(CanonicalType); +} + +bool Type::isStructureType() const { + if (const TagType *TT = dyn_cast<TagType>(CanonicalType)) { + if (TT->getDecl()->getKind() == Decl::Struct) + return true; + } + return false; +} + +bool Type::isUnionType() const { + if (const TagType *TT = dyn_cast<TagType>(CanonicalType)) { + if (TT->getDecl()->getKind() == Decl::Union) + return true; + } + return false; +} + +// C99 6.2.7p1: If both are complete types, then the following additional +// requirements apply...FIXME (handle compatibility across source files). +bool Type::tagTypesAreCompatible(QualType lhs, QualType rhs) { + TagDecl *ldecl = cast<TagType>(lhs.getCanonicalType())->getDecl(); + TagDecl *rdecl = cast<TagType>(rhs.getCanonicalType())->getDecl(); + + if (ldecl->getKind() == Decl::Struct && rdecl->getKind() == Decl::Struct) { + if (ldecl->getIdentifier() == rdecl->getIdentifier()) + return true; + } + if (ldecl->getKind() == Decl::Union && rdecl->getKind() == Decl::Union) { + if (ldecl->getIdentifier() == rdecl->getIdentifier()) + return true; + } + return false; +} + +bool Type::pointerTypesAreCompatible(QualType lhs, QualType rhs) { + // C99 6.7.5.1p2: For two pointer types to be compatible, both shall be + // identically qualified and both shall be pointers to compatible types. + if (lhs.getQualifiers() != rhs.getQualifiers()) + return false; + + QualType ltype = cast<PointerType>(lhs.getCanonicalType())->getPointeeType(); + QualType rtype = cast<PointerType>(rhs.getCanonicalType())->getPointeeType(); + + return typesAreCompatible(ltype, rtype); +} + +// C++ 5.17p6: When the left opperand of an assignment operator denotes a +// reference to T, the operation assigns to the object of type T denoted by the +// reference. +bool Type::referenceTypesAreCompatible(QualType lhs, QualType rhs) { + QualType ltype = lhs; + + if (lhs->isReferenceType()) + ltype = cast<ReferenceType>(lhs.getCanonicalType())->getReferenceeType(); + + QualType rtype = rhs; + + if (rhs->isReferenceType()) + rtype = cast<ReferenceType>(rhs.getCanonicalType())->getReferenceeType(); + + return typesAreCompatible(ltype, rtype); +} + +bool Type::functionTypesAreCompatible(QualType lhs, QualType rhs) { + const FunctionType *lbase = cast<FunctionType>(lhs.getCanonicalType()); + const FunctionType *rbase = cast<FunctionType>(rhs.getCanonicalType()); + const FunctionTypeProto *lproto = dyn_cast<FunctionTypeProto>(lbase); + const FunctionTypeProto *rproto = dyn_cast<FunctionTypeProto>(rbase); + + // first check the return types (common between C99 and K&R). + if (!typesAreCompatible(lbase->getResultType(), rbase->getResultType())) + return false; + + if (lproto && rproto) { // two C99 style function prototypes + unsigned lproto_nargs = lproto->getNumArgs(); + unsigned rproto_nargs = rproto->getNumArgs(); + + if (lproto_nargs != rproto_nargs) + return false; + + // both prototypes have the same number of arguments. + if ((lproto->isVariadic() && !rproto->isVariadic()) || + (rproto->isVariadic() && !lproto->isVariadic())) + return false; + + // The use of ellipsis agree...now check the argument types. + for (unsigned i = 0; i < lproto_nargs; i++) + if (!typesAreCompatible(lproto->getArgType(i), rproto->getArgType(i))) + return false; + return true; + } + if (!lproto && !rproto) // two K&R style function decls, nothing to do. + return true; + + // we have a mixture of K&R style with C99 prototypes + const FunctionTypeProto *proto = lproto ? lproto : rproto; + + if (proto->isVariadic()) + return false; + + // FIXME: Each parameter type T in the prototype must be compatible with the + // type resulting from applying the usual argument conversions to T. + return true; +} + +bool Type::arrayTypesAreCompatible(QualType lhs, QualType rhs) { + QualType ltype = cast<ArrayType>(lhs.getCanonicalType())->getElementType(); + QualType rtype = cast<ArrayType>(rhs.getCanonicalType())->getElementType(); + + if (!typesAreCompatible(ltype, rtype)) + return false; + + // FIXME: If both types specify constant sizes, then the sizes must also be + // the same. Even if the sizes are the same, GCC produces an error. + return true; +} + +/// typesAreCompatible - C99 6.7.3p9: For two qualified types to be compatible, +/// both shall have the identically qualified version of a compatible type. +/// C99 6.2.7p1: Two types have compatible types if their types are the +/// same. See 6.7.[2,3,5] for additional rules. +bool Type::typesAreCompatible(QualType lhs, QualType rhs) { + QualType lcanon = lhs.getCanonicalType(); + QualType rcanon = rhs.getCanonicalType(); + + // If two types are identical, they are are compatible + if (lcanon == rcanon) + return true; + + // If the canonical type classes don't match, they can't be compatible + if (lcanon->getTypeClass() != rcanon->getTypeClass()) + return false; + + switch (lcanon->getTypeClass()) { + case Type::Pointer: + return pointerTypesAreCompatible(lcanon, rcanon); + case Type::Reference: + return referenceTypesAreCompatible(lcanon, rcanon); + case Type::Array: + return arrayTypesAreCompatible(lcanon, rcanon); + case Type::FunctionNoProto: + case Type::FunctionProto: + return functionTypesAreCompatible(lcanon, rcanon); + case Type::Tagged: // handle structures, unions + return tagTypesAreCompatible(lcanon, rcanon); + case Type::Builtin: + return false; + default: + assert(0 && "unexpected type"); + } + return true; // should never get here... +} + +bool Type::isIntegerType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() >= BuiltinType::Bool && + BT->getKind() <= BuiltinType::LongLong; + if (const TagType *TT = dyn_cast<TagType>(CanonicalType)) + if (TT->getDecl()->getKind() == Decl::Enum) + return true; + return false; +} + +bool Type::isSignedIntegerType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) { + return BT->getKind() >= BuiltinType::Char_S && + BT->getKind() <= BuiltinType::LongLong; + } + return false; +} + +bool Type::isUnsignedIntegerType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) { + return BT->getKind() >= BuiltinType::Bool && + BT->getKind() <= BuiltinType::ULongLong; + } + return false; +} + +bool Type::isFloatingType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() >= BuiltinType::Float && + BT->getKind() <= BuiltinType::LongDouble; + if (const ComplexType *CT = dyn_cast<ComplexType>(CanonicalType)) + return CT->isFloatingType(); + return false; +} + +bool Type::isRealFloatingType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() >= BuiltinType::Float && + BT->getKind() <= BuiltinType::LongDouble; + return false; +} + +bool Type::isRealType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() >= BuiltinType::Bool && + BT->getKind() <= BuiltinType::LongDouble; + if (const TagType *TT = dyn_cast<TagType>(CanonicalType)) + return TT->getDecl()->getKind() == Decl::Enum; + return false; +} + +bool Type::isComplexType() const { + return isa<ComplexType>(CanonicalType); +} + +bool Type::isVectorType() const { + return isa<VectorType>(CanonicalType); +} + +bool Type::isArithmeticType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() != BuiltinType::Void; + if (const TagType *TT = dyn_cast<TagType>(CanonicalType)) + if (TT->getDecl()->getKind() == Decl::Enum) + return true; + return isa<ComplexType>(CanonicalType) || isa<VectorType>(CanonicalType); +} + +bool Type::isScalarType() const { + if (const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType)) + return BT->getKind() != BuiltinType::Void; + if (const TagType *TT = dyn_cast<TagType>(CanonicalType)) { + if (TT->getDecl()->getKind() == Decl::Enum) + return true; + return false; + } + return isa<PointerType>(CanonicalType) || isa<ComplexType>(CanonicalType); +} + +bool Type::isAggregateType() const { + if (const TagType *TT = dyn_cast<TagType>(CanonicalType)) { + if (TT->getDecl()->getKind() == Decl::Struct) + return true; + return false; + } + return CanonicalType->getTypeClass() == Array; +} + +// The only variable size types are auto arrays within a function. Structures +// cannot contain a VLA member. They can have a flexible array member, however +// the structure is still constant size (C99 6.7.2.1p16). +bool Type::isConstantSizeType(SourceLocation *loc) const { + if (const ArrayType *Ary = dyn_cast<ArrayType>(CanonicalType)) { + assert(Ary->getSize() && "Incomplete types don't have a size at all!"); + return Ary->getSize()->isIntegerConstantExpr(loc); // Variable Length Array? + } + return true; +} + +/// isIncompleteType - Return true if this is an incomplete type (C99 6.2.5p1) +/// - a type that can describe objects, but which lacks information needed to +/// determine its size. +bool Type::isIncompleteType() const { + switch (CanonicalType->getTypeClass()) { + default: return false; + case Builtin: + // Void is the only incomplete builtin type. Per C99 6.2.5p19, it can never + // be completed. + return isVoidType(); + case Tagged: + // A tagged type (struct/union/enum/class) is incomplete if the decl is a + // forward declaration, but not a full definition (C99 6.2.5p22). + return !cast<TagType>(CanonicalType)->getDecl()->isDefinition(); + case Array: + // An array of unknown size is an incomplete type (C99 6.2.5p22). + return cast<ArrayType>(CanonicalType)->getSize() == 0; + } +} + +bool Type::isPromotableIntegerType() const { + const BuiltinType *BT = dyn_cast<BuiltinType>(CanonicalType); + if (!BT) return false; + switch (BT->getKind()) { + case BuiltinType::Bool: + case BuiltinType::Char_S: + case BuiltinType::Char_U: + case BuiltinType::SChar: + case BuiltinType::UChar: + case BuiltinType::Short: + case BuiltinType::UShort: + return true; + default: + return false; + } +} + +const char *BuiltinType::getName() const { + switch (getKind()) { + default: assert(0 && "Unknown builtin type!"); + case Void: return "void"; + case Bool: return "_Bool"; + case Char_S: return "char"; + case Char_U: return "char"; + case SChar: return "signed char"; + case Short: return "short"; + case Int: return "int"; + case Long: return "long"; + case LongLong: return "long long"; + case UChar: return "unsigned char"; + case UShort: return "unsigned short"; + case UInt: return "unsigned int"; + case ULong: return "unsigned long"; + case ULongLong: return "unsigned long long"; + case Float: return "float"; + case Double: return "double"; + case LongDouble: return "long double"; + } +} + +// FIXME: need to use TargetInfo to derive the target specific sizes. This +// implementation will suffice for play with vector support. +unsigned BuiltinType::getSize() const { + switch (getKind()) { + default: assert(0 && "Unknown builtin type!"); + case Void: return 0; + case Bool: + case Char_S: + case Char_U: return sizeof(char) * 8; + case SChar: return sizeof(signed char) * 8; + case Short: return sizeof(short) * 8; + case Int: return sizeof(int) * 8; + case Long: return sizeof(long) * 8; + case LongLong: return sizeof(long long) * 8; + case UChar: return sizeof(unsigned char) * 8; + case UShort: return sizeof(unsigned short) * 8; + case UInt: return sizeof(unsigned int) * 8; + case ULong: return sizeof(unsigned long) * 8; + case ULongLong: return sizeof(unsigned long long) * 8; + case Float: return sizeof(float) * 8; + case Double: return sizeof(double) * 8; + case LongDouble: return sizeof(long double) * 8; + } +} + +void FunctionTypeProto::Profile(llvm::FoldingSetNodeID &ID, QualType Result, + QualType* ArgTys, + unsigned NumArgs, bool isVariadic) { + ID.AddPointer(Result.getAsOpaquePtr()); + for (unsigned i = 0; i != NumArgs; ++i) + ID.AddPointer(ArgTys[i].getAsOpaquePtr()); + ID.AddInteger(isVariadic); +} + +void FunctionTypeProto::Profile(llvm::FoldingSetNodeID &ID) { + Profile(ID, getResultType(), ArgInfo, NumArgs, isVariadic()); +} + + +bool RecordType::classof(const Type *T) { + if (const TagType *TT = dyn_cast<TagType>(T)) + return isa<RecordDecl>(TT->getDecl()); + return false; +} + + +//===----------------------------------------------------------------------===// +// Type Printing +//===----------------------------------------------------------------------===// + +void QualType::dump(const char *msg) const { + std::string R = "foo"; + getAsStringInternal(R); + if (msg) + fprintf(stderr, "%s: %s\n", msg, R.c_str()); + else + fprintf(stderr, "%s\n", R.c_str()); +} + +static void AppendTypeQualList(std::string &S, unsigned TypeQuals) { + // Note: funkiness to ensure we get a space only between quals. + bool NonePrinted = true; + if (TypeQuals & QualType::Const) + S += "const", NonePrinted = false; + if (TypeQuals & QualType::Volatile) + S += (NonePrinted+" volatile"), NonePrinted = false; + if (TypeQuals & QualType::Restrict) + S += (NonePrinted+" restrict"), NonePrinted = false; +} + +void QualType::getAsStringInternal(std::string &S) const { + if (isNull()) { + S += "NULL TYPE\n"; + return; + } + + // Print qualifiers as appropriate. + if (unsigned TQ = getQualifiers()) { + std::string TQS; + AppendTypeQualList(TQS, TQ); + if (!S.empty()) + S = TQS + ' ' + S; + else + S = TQS; + } + + getTypePtr()->getAsStringInternal(S); +} + +void BuiltinType::getAsStringInternal(std::string &S) const { + if (S.empty()) { + S = getName(); + } else { + // Prefix the basic type, e.g. 'int X'. + S = ' ' + S; + S = getName() + S; + } +} + +void ComplexType::getAsStringInternal(std::string &S) const { + ElementType->getAsStringInternal(S); + S = "_Complex " + S; +} + +void PointerType::getAsStringInternal(std::string &S) const { + S = '*' + S; + + // Handle things like 'int (*A)[4];' correctly. + // FIXME: this should include vectors, but vectors use attributes I guess. + if (isa<ArrayType>(PointeeType.getTypePtr())) + S = '(' + S + ')'; + + PointeeType.getAsStringInternal(S); +} + +void ReferenceType::getAsStringInternal(std::string &S) const { + S = '&' + S; + + // Handle things like 'int (&A)[4];' correctly. + // FIXME: this should include vectors, but vectors use attributes I guess. + if (isa<ArrayType>(ReferenceeType.getTypePtr())) + S = '(' + S + ')'; + + ReferenceeType.getAsStringInternal(S); +} + +void ArrayType::getAsStringInternal(std::string &S) const { + S += '['; + + if (IndexTypeQuals) { + AppendTypeQualList(S, IndexTypeQuals); + S += ' '; + } + + if (SizeModifier == Static) + S += "static"; + else if (SizeModifier == Star) + S += '*'; + + S += ']'; + + ElementType.getAsStringInternal(S); +} + +void VectorType::getAsStringInternal(std::string &S) const { + S += " __attribute__(( vector_size("; + // FIXME: handle types that are != 32 bits. + S += llvm::utostr_32(NumElements*4); // convert back to bytes. + S += ") ))"; + ElementType.getAsStringInternal(S); +} + +void FunctionTypeNoProto::getAsStringInternal(std::string &S) const { + // If needed for precedence reasons, wrap the inner part in grouping parens. + if (!S.empty()) + S = "(" + S + ")"; + + S += "()"; + getResultType().getAsStringInternal(S); +} + +void FunctionTypeProto::getAsStringInternal(std::string &S) const { + // If needed for precedence reasons, wrap the inner part in grouping parens. + if (!S.empty()) + S = "(" + S + ")"; + + S += "("; + std::string Tmp; + for (unsigned i = 0, e = getNumArgs(); i != e; ++i) { + if (i) S += ", "; + getArgType(i).getAsStringInternal(Tmp); + S += Tmp; + Tmp.clear(); + } + + if (isVariadic()) { + if (getNumArgs()) + S += ", "; + S += "..."; + } else if (getNumArgs() == 0) { + // Do not emit int() if we have a proto, emit 'int(void)'. + S += "void"; + } + + S += ")"; + getResultType().getAsStringInternal(S); +} + + +void TypedefType::getAsStringInternal(std::string &InnerString) const { + if (!InnerString.empty()) // Prefix the basic type, e.g. 'typedefname X'. + InnerString = ' ' + InnerString; + InnerString = getDecl()->getIdentifier()->getName() + InnerString; +} + +void TagType::getAsStringInternal(std::string &InnerString) const { + if (!InnerString.empty()) // Prefix the basic type, e.g. 'typedefname X'. + InnerString = ' ' + InnerString; + + const char *Kind = getDecl()->getKindName(); + const char *ID; + if (const IdentifierInfo *II = getDecl()->getIdentifier()) + ID = II->getName(); + else + ID = "<anonymous>"; + + InnerString = std::string(Kind) + " " + ID + InnerString; +} diff --git a/Basic/Diagnostic.cpp b/Basic/Diagnostic.cpp new file mode 100644 index 0000000000..155b6fca12 --- /dev/null +++ b/Basic/Diagnostic.cpp @@ -0,0 +1,147 @@ +//===--- Diagnostic.cpp - C Language Family Diagnostic Handling -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Diagnostic-related interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/SourceLocation.h" +#include <cassert> +using namespace clang; + +/// Flag values for diagnostics. +enum { + // Diagnostic classes. + NOTE = 0x01, + WARNING = 0x02, + EXTENSION = 0x03, + ERROR = 0x04, + FATAL = 0x05, + class_mask = 0x07 +}; + +/// DiagnosticFlags - A set of flags, or'd together, that describe the +/// diagnostic. +static unsigned char DiagnosticFlags[] = { +#define DIAG(ENUM,FLAGS,DESC) FLAGS, +#include "clang/Basic/DiagnosticKinds.def" + 0 +}; + +/// getDiagClass - Return the class field of the diagnostic. +/// +static unsigned getDiagClass(unsigned DiagID) { + assert(DiagID < diag::NUM_DIAGNOSTICS && "Diagnostic ID out of range!"); + return DiagnosticFlags[DiagID] & class_mask; +} + +/// DiagnosticText - An english message to print for the diagnostic. These +/// should be localized. +static const char * const DiagnosticText[] = { +#define DIAG(ENUM,FLAGS,DESC) DESC, +#include "clang/Basic/DiagnosticKinds.def" + 0 +}; + +Diagnostic::Diagnostic(DiagnosticClient &client) : Client(client) { + WarningsAsErrors = false; + WarnOnExtensions = false; + ErrorOnExtensions = false; + // Clear all mappings, setting them to MAP_DEFAULT. + memset(DiagMappings, 0, sizeof(DiagMappings)); + + ErrorOccurred = false; + NumDiagnostics = 0; + NumErrors = 0; +} + +/// isNoteWarningOrExtension - Return true if the unmapped diagnostic level of +/// the specified diagnostic ID is a Note, Warning, or Extension. +bool Diagnostic::isNoteWarningOrExtension(unsigned DiagID) { + return getDiagClass(DiagID) < ERROR; +} + + +/// getDescription - Given a diagnostic ID, return a description of the +/// issue. +const char *Diagnostic::getDescription(unsigned DiagID) { + assert(DiagID < diag::NUM_DIAGNOSTICS && "Diagnostic ID out of range!"); + return DiagnosticText[DiagID]; +} + +/// getDiagnosticLevel - Based on the way the client configured the Diagnostic +/// object, classify the specified diagnostic ID into a Level, consumable by +/// the DiagnosticClient. +Diagnostic::Level Diagnostic::getDiagnosticLevel(unsigned DiagID) const { + unsigned DiagClass = getDiagClass(DiagID); + + // Specific non-error diagnostics may be mapped to various levels from ignored + // to error. + if (DiagClass < ERROR) { + switch (getDiagnosticMapping((diag::kind)DiagID)) { + case diag::MAP_DEFAULT: break; + case diag::MAP_IGNORE: return Ignored; + case diag::MAP_WARNING: DiagClass = WARNING; break; + case diag::MAP_ERROR: DiagClass = ERROR; break; + } + } + + // Map diagnostic classes based on command line argument settings. + if (DiagClass == EXTENSION) { + if (ErrorOnExtensions) + DiagClass = ERROR; + else if (WarnOnExtensions) + DiagClass = WARNING; + else + return Ignored; + } + + // If warnings are to be treated as errors, indicate this as such. + if (DiagClass == WARNING && WarningsAsErrors) + DiagClass = ERROR; + + switch (DiagClass) { + default: assert(0 && "Unknown diagnostic class!"); + case NOTE: return Diagnostic::Note; + case WARNING: return Diagnostic::Warning; + case ERROR: return Diagnostic::Error; + case FATAL: return Diagnostic::Fatal; + } +} + +/// Report - Issue the message to the client. If the client wants us to stop +/// compilation, return true, otherwise return false. DiagID is a member of +/// the diag::kind enum. +void Diagnostic::Report(SourceLocation Pos, unsigned DiagID, + const std::string *Strs, unsigned NumStrs, + const SourceRange *Ranges, unsigned NumRanges) { + // Figure out the diagnostic level of this message. + Diagnostic::Level DiagLevel = getDiagnosticLevel(DiagID); + + // If the client doesn't care about this message, don't issue it. + if (DiagLevel == Diagnostic::Ignored) + return; + + if (DiagLevel >= Diagnostic::Error) { + ErrorOccurred = true; + ++NumErrors; + } + + // Are we going to ignore this diagnosic? + if (Client.IgnoreDiagnostic(DiagLevel, Pos)) + return; + + // Finally, report it. + Client.HandleDiagnostic(DiagLevel, Pos, (diag::kind)DiagID, Strs, NumStrs, + Ranges, NumRanges); + ++NumDiagnostics; +} + +DiagnosticClient::~DiagnosticClient() {} diff --git a/Basic/FileManager.cpp b/Basic/FileManager.cpp new file mode 100644 index 0000000000..9886e032b4 --- /dev/null +++ b/Basic/FileManager.cpp @@ -0,0 +1,169 @@ +//===--- FileManager.cpp - File System Probing and Caching ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the FileManager interface. +// +//===----------------------------------------------------------------------===// +// +// TODO: This should index all interesting directories with dirent calls. +// getdirentries ? +// opendir/readdir_r/closedir ? +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/FileManager.h" +#include "llvm/ADT/SmallString.h" +#include <iostream> +using namespace clang; + +// FIXME: Enhance libsystem to support inode and other fields. +#include <sys/stat.h> + + +/// NON_EXISTANT_DIR - A special value distinct from null that is used to +/// represent a dir name that doesn't exist on the disk. +#define NON_EXISTANT_DIR reinterpret_cast<DirectoryEntry*>((intptr_t)-1) + +/// getDirectory - Lookup, cache, and verify the specified directory. This +/// returns null if the directory doesn't exist. +/// +const DirectoryEntry *FileManager::getDirectory(const char *NameStart, + const char *NameEnd) { + ++NumDirLookups; + llvm::StringMapEntry<DirectoryEntry *> &NamedDirEnt = + DirEntries.GetOrCreateValue(NameStart, NameEnd); + + // See if there is already an entry in the map. + if (NamedDirEnt.getValue()) + return NamedDirEnt.getValue() == NON_EXISTANT_DIR + ? 0 : NamedDirEnt.getValue(); + + ++NumDirCacheMisses; + + // By default, initialize it to invalid. + NamedDirEnt.setValue(NON_EXISTANT_DIR); + + // Get the null-terminated directory name as stored as the key of the + // DirEntries map. + const char *InterndDirName = NamedDirEnt.getKeyData(); + + // Check to see if the directory exists. + struct stat StatBuf; + if (stat(InterndDirName, &StatBuf) || // Error stat'ing. + !S_ISDIR(StatBuf.st_mode)) // Not a directory? + return 0; + + // It exists. See if we have already opened a directory with the same inode. + // This occurs when one dir is symlinked to another, for example. + DirectoryEntry &UDE = + UniqueDirs[std::make_pair(StatBuf.st_dev, StatBuf.st_ino)]; + + NamedDirEnt.setValue(&UDE); + if (UDE.getName()) // Already have an entry with this inode, return it. + return &UDE; + + // Otherwise, we don't have this directory yet, add it. We use the string + // key from the DirEntries map as the string. + UDE.Name = InterndDirName; + return &UDE; +} + +/// NON_EXISTANT_FILE - A special value distinct from null that is used to +/// represent a filename that doesn't exist on the disk. +#define NON_EXISTANT_FILE reinterpret_cast<FileEntry*>((intptr_t)-1) + +/// getFile - Lookup, cache, and verify the specified file. This returns null +/// if the file doesn't exist. +/// +const FileEntry *FileManager::getFile(const char *NameStart, + const char *NameEnd) { + ++NumFileLookups; + + // See if there is already an entry in the map. + llvm::StringMapEntry<FileEntry *> &NamedFileEnt = + FileEntries.GetOrCreateValue(NameStart, NameEnd); + + // See if there is already an entry in the map. + if (NamedFileEnt.getValue()) + return NamedFileEnt.getValue() == NON_EXISTANT_FILE + ? 0 : NamedFileEnt.getValue(); + + ++NumFileCacheMisses; + + // By default, initialize it to invalid. + NamedFileEnt.setValue(NON_EXISTANT_FILE); + + // Figure out what directory it is in. If the string contains a / in it, + // strip off everything after it. + // FIXME: this logic should be in sys::Path. + const char *SlashPos = NameEnd-1; + while (SlashPos >= NameStart && SlashPos[0] != '/') + --SlashPos; + + const DirectoryEntry *DirInfo; + if (SlashPos < NameStart) { + // Use the current directory if file has no path component. + const char *Name = "."; + DirInfo = getDirectory(Name, Name+1); + } else if (SlashPos == NameEnd-1) + return 0; // If filename ends with a /, it's a directory. + else + DirInfo = getDirectory(NameStart, SlashPos); + + if (DirInfo == 0) // Directory doesn't exist, file can't exist. + return 0; + + // Get the null-terminated file name as stored as the key of the + // FileEntries map. + const char *InterndFileName = NamedFileEnt.getKeyData(); + + // FIXME: Use the directory info to prune this, before doing the stat syscall. + // FIXME: This will reduce the # syscalls. + + // Nope, there isn't. Check to see if the file exists. + struct stat StatBuf; + //std::cerr << "STATING: " << Filename; + if (stat(InterndFileName, &StatBuf) || // Error stat'ing. + S_ISDIR(StatBuf.st_mode)) { // A directory? + // If this file doesn't exist, we leave a null in FileEntries for this path. + //std::cerr << ": Not existing\n"; + return 0; + } + //std::cerr << ": exists\n"; + + // It exists. See if we have already opened a directory with the same inode. + // This occurs when one dir is symlinked to another, for example. + FileEntry &UFE = UniqueFiles[std::make_pair(StatBuf.st_dev, StatBuf.st_ino)]; + + NamedFileEnt.setValue(&UFE); + if (UFE.getName()) // Already have an entry with this inode, return it. + return &UFE; + + // Otherwise, we don't have this directory yet, add it. + // FIXME: Change the name to be a char* that points back to the 'FileEntries' + // key. + UFE.Name = InterndFileName; + UFE.Size = StatBuf.st_size; + UFE.ModTime = StatBuf.st_mtime; + UFE.Dir = DirInfo; + UFE.UID = NextFileUID++; + return &UFE; +} + +void FileManager::PrintStats() const { + std::cerr << "\n*** File Manager Stats:\n"; + std::cerr << UniqueFiles.size() << " files found, " + << UniqueDirs.size() << " dirs found.\n"; + std::cerr << NumDirLookups << " dir lookups, " + << NumDirCacheMisses << " dir cache misses.\n"; + std::cerr << NumFileLookups << " file lookups, " + << NumFileCacheMisses << " file cache misses.\n"; + + //std::cerr << PagesMapped << BytesOfPagesMapped << FSLookups; +} diff --git a/Basic/Makefile b/Basic/Makefile new file mode 100644 index 0000000000..1db0a7f19b --- /dev/null +++ b/Basic/Makefile @@ -0,0 +1,22 @@ +##===- clang/Basic/Makefile --------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file was developed by Chris Lattner and is distributed under +# the University of Illinois Open Source License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This implements the Basic library for the C-Language front-end. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME := clangBasic +BUILD_ARCHIVE = 1 +CXXFLAGS = -fno-rtti + +CPPFLAGS += -I$(PROJ_SRC_DIR)/../include + +include $(LEVEL)/Makefile.common + diff --git a/Basic/SourceManager.cpp b/Basic/SourceManager.cpp new file mode 100644 index 0000000000..f6148c1602 --- /dev/null +++ b/Basic/SourceManager.cpp @@ -0,0 +1,370 @@ +//===--- SourceManager.cpp - Track and cache source files -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SourceManager interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/FileManager.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/System/Path.h" +#include <algorithm> +#include <iostream> +using namespace clang; +using namespace SrcMgr; +using llvm::MemoryBuffer; + +SourceManager::~SourceManager() { + for (std::map<const FileEntry *, FileInfo>::iterator I = FileInfos.begin(), + E = FileInfos.end(); I != E; ++I) { + delete I->second.Buffer; + delete[] I->second.SourceLineCache; + } + + for (std::list<InfoRec>::iterator I = MemBufferInfos.begin(), + E = MemBufferInfos.end(); I != E; ++I) { + delete I->second.Buffer; + delete[] I->second.SourceLineCache; + } +} + + +// FIXME: REMOVE THESE +#include <unistd.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/fcntl.h> +#include <cerrno> + +static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) { +#if 0 + // FIXME: Reintroduce this and zap this function once the common llvm stuff + // is fast for the small case. + return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()), + FileEnt->getSize()); +#endif + + // If the file is larger than some threshold, use 'read', otherwise use mmap. + if (FileEnt->getSize() >= 4096*4) + return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()), + 0, FileEnt->getSize()); + + MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(), + FileEnt->getName()); + char *BufPtr = const_cast<char*>(SB->getBufferStart()); + + int FD = ::open(FileEnt->getName(), O_RDONLY); + if (FD == -1) { + delete SB; + return 0; + } + + unsigned BytesLeft = FileEnt->getSize(); + while (BytesLeft) { + ssize_t NumRead = ::read(FD, BufPtr, BytesLeft); + if (NumRead != -1) { + BytesLeft -= NumRead; + BufPtr += NumRead; + } else if (errno == EINTR) { + // try again + } else { + // error reading. + close(FD); + delete SB; + return 0; + } + } + close(FD); + + return SB; +} + + +/// getFileInfo - Create or return a cached FileInfo for the specified file. +/// +const InfoRec * +SourceManager::getInfoRec(const FileEntry *FileEnt) { + assert(FileEnt && "Didn't specify a file entry to use?"); + // Do we already have information about this file? + std::map<const FileEntry *, FileInfo>::iterator I = + FileInfos.lower_bound(FileEnt); + if (I != FileInfos.end() && I->first == FileEnt) + return &*I; + + // Nope, get information. + const MemoryBuffer *File = ReadFileFast(FileEnt); + if (File == 0) + return 0; + + const InfoRec &Entry = + *FileInfos.insert(I, std::make_pair(FileEnt, FileInfo())); + FileInfo &Info = const_cast<FileInfo &>(Entry.second); + + Info.Buffer = File; + Info.SourceLineCache = 0; + Info.NumLines = 0; + return &Entry; +} + + +/// createMemBufferInfoRec - Create a new info record for the specified memory +/// buffer. This does no caching. +const InfoRec * +SourceManager::createMemBufferInfoRec(const MemoryBuffer *Buffer) { + // Add a new info record to the MemBufferInfos list and return it. + FileInfo FI; + FI.Buffer = Buffer; + FI.SourceLineCache = 0; + FI.NumLines = 0; + MemBufferInfos.push_back(InfoRec(0, FI)); + return &MemBufferInfos.back(); +} + + +/// createFileID - Create a new fileID for the specified InfoRec and include +/// position. This works regardless of whether the InfoRec corresponds to a +/// file or some other input source. +unsigned SourceManager::createFileID(const InfoRec *File, + SourceLocation IncludePos) { + // If FileEnt is really large (e.g. it's a large .i file), we may not be able + // to fit an arbitrary position in the file in the FilePos field. To handle + // this, we create one FileID for each chunk of the file that fits in a + // FilePos field. + unsigned FileSize = File->second.Buffer->getBufferSize(); + if (FileSize+1 < (1 << SourceLocation::FilePosBits)) { + FileIDs.push_back(FileIDInfo::getNormalBuffer(IncludePos, 0, File)); + assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) && + "Ran out of file ID's!"); + return FileIDs.size(); + } + + // Create one FileID for each chunk of the file. + unsigned Result = FileIDs.size()+1; + + unsigned ChunkNo = 0; + while (1) { + FileIDs.push_back(FileIDInfo::getNormalBuffer(IncludePos, ChunkNo++, File)); + + if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break; + FileSize -= (1 << SourceLocation::FilePosBits); + } + + assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) && + "Ran out of file ID's!"); + return Result; +} + +/// getInstantiationLoc - Return a new SourceLocation that encodes the fact +/// that a token from physloc PhysLoc should actually be referenced from +/// InstantiationLoc. +SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc, + SourceLocation InstantLoc) { + assert(getFIDInfo(PhysLoc.getFileID())->IDType != + SrcMgr::FileIDInfo::MacroExpansion && + "Location instantiated in a macro?"); + + // Resolve InstantLoc down to a real logical location. + InstantLoc = getLogicalLoc(InstantLoc); + + unsigned InstantiationFileID; + // If this is the same instantiation as was requested last time, return this + // immediately. + if (PhysLoc.getFileID() == LastInstantiationLoc_MacroFID && + InstantLoc == LastInstantiationLoc_InstantLoc) { + InstantiationFileID = LastInstantiationLoc_Result; + } else { + // Add a FileID for this. FIXME: should cache these! + FileIDs.push_back(FileIDInfo::getMacroExpansion(InstantLoc, + PhysLoc.getFileID())); + InstantiationFileID = FileIDs.size(); + + // Remember this in the single-entry cache for next time. + LastInstantiationLoc_MacroFID = PhysLoc.getFileID(); + LastInstantiationLoc_InstantLoc = InstantLoc; + LastInstantiationLoc_Result = InstantiationFileID; + } + return SourceLocation(InstantiationFileID, PhysLoc.getRawFilePos()); +} + + + +/// getCharacterData - Return a pointer to the start of the specified location +/// in the appropriate MemoryBuffer. +const char *SourceManager::getCharacterData(SourceLocation SL) const { + // Note that this is a hot function in the getSpelling() path, which is + // heavily used by -E mode. + unsigned FileID = SL.getFileID(); + assert(FileID && "Invalid source location!"); + + return getFileInfo(FileID)->Buffer->getBufferStart() + getFilePos(SL); +} + +/// getIncludeLoc - Return the location of the #include for the specified +/// FileID. +SourceLocation SourceManager::getIncludeLoc(unsigned FileID) const { + const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(FileID); + + // For Macros, the physical loc is specified by the MacroTokenFileID. + if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) + FIDInfo = &FileIDs[FIDInfo->u.MacroTokenFileID-1]; + + return FIDInfo->IncludeLoc; +} + + +/// getColumnNumber - Return the column # for the specified include position. +/// this is significantly cheaper to compute than the line number. This returns +/// zero if the column number isn't known. +unsigned SourceManager::getColumnNumber(SourceLocation Loc) const { + Loc = getLogicalLoc(Loc); + unsigned FileID = Loc.getFileID(); + if (FileID == 0) return 0; + + unsigned FilePos = getFilePos(Loc); + const MemoryBuffer *Buffer = getBuffer(FileID); + const char *Buf = Buffer->getBufferStart(); + + unsigned LineStart = FilePos; + while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r') + --LineStart; + return FilePos-LineStart+1; +} + +/// getSourceName - This method returns the name of the file or buffer that +/// the SourceLocation specifies. This can be modified with #line directives, +/// etc. +std::string SourceManager::getSourceName(SourceLocation Loc) { + Loc = getLogicalLoc(Loc); + unsigned FileID = Loc.getFileID(); + if (FileID == 0) return ""; + return getFileInfo(FileID)->Buffer->getBufferIdentifier(); +} + + +/// getLineNumber - Given a SourceLocation, return the physical line number +/// for the position indicated. This requires building and caching a table of +/// line offsets for the MemoryBuffer, so this is not cheap: use only when +/// about to emit a diagnostic. +unsigned SourceManager::getLineNumber(SourceLocation Loc) { + Loc = getLogicalLoc(Loc); + unsigned FileID = Loc.getFileID(); + if (FileID == 0) return 0; + FileInfo *FileInfo = getFileInfo(FileID); + + // If this is the first use of line information for this buffer, compute the + /// SourceLineCache for it on demand. + if (FileInfo->SourceLineCache == 0) { + const MemoryBuffer *Buffer = FileInfo->Buffer; + + // Find the file offsets of all of the *physical* source lines. This does + // not look at trigraphs, escaped newlines, or anything else tricky. + std::vector<unsigned> LineOffsets; + + // Line #1 starts at char 0. + LineOffsets.push_back(0); + + const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart(); + const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd(); + unsigned Offs = 0; + while (1) { + // Skip over the contents of the line. + // TODO: Vectorize this? This is very performance sensitive for programs + // with lots of diagnostics and in -E mode. + const unsigned char *NextBuf = (const unsigned char *)Buf; + while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') + ++NextBuf; + Offs += NextBuf-Buf; + Buf = NextBuf; + + if (Buf[0] == '\n' || Buf[0] == '\r') { + // If this is \n\r or \r\n, skip both characters. + if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) + ++Offs, ++Buf; + ++Offs, ++Buf; + LineOffsets.push_back(Offs); + } else { + // Otherwise, this is a null. If end of file, exit. + if (Buf == End) break; + // Otherwise, skip the null. + ++Offs, ++Buf; + } + } + LineOffsets.push_back(Offs); + + // Copy the offsets into the FileInfo structure. + FileInfo->NumLines = LineOffsets.size(); + FileInfo->SourceLineCache = new unsigned[LineOffsets.size()]; + std::copy(LineOffsets.begin(), LineOffsets.end(), + FileInfo->SourceLineCache); + } + + // Okay, we know we have a line number table. Do a binary search to find the + // line number that this character position lands on. + unsigned NumLines = FileInfo->NumLines; + unsigned *SourceLineCache = FileInfo->SourceLineCache; + + // TODO: If this is performance sensitive, we could try doing simple radix + // type approaches to make good (tight?) initial guesses based on the + // assumption that all lines are the same average size. + unsigned *Pos = std::lower_bound(SourceLineCache, SourceLineCache+NumLines, + getFilePos(Loc)+1); + return Pos-SourceLineCache; +} + +/// getSourceFilePos - This method returns the *logical* offset from the start +/// of the file that the specified SourceLocation represents. This returns +/// the location of the *logical* character data, not the physical file +/// position. In the case of macros, for example, this returns where the +/// macro was instantiated, not where the characters for the macro can be +/// found. +unsigned SourceManager::getSourceFilePos(SourceLocation Loc) const { + + // If this is a macro, we need to get the instantiation location. + const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID()); + while (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) { + Loc = FIDInfo->IncludeLoc; + FIDInfo = getFIDInfo(Loc.getFileID()); + } + + return getFilePos(Loc); +} + + +/// PrintStats - Print statistics to stderr. +/// +void SourceManager::PrintStats() const { + std::cerr << "\n*** Source Manager Stats:\n"; + std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size() + << " mem buffers mapped, " << FileIDs.size() + << " file ID's allocated.\n"; + unsigned NumBuffers = 0, NumMacros = 0; + for (unsigned i = 0, e = FileIDs.size(); i != e; ++i) { + if (FileIDs[i].IDType == FileIDInfo::NormalBuffer) + ++NumBuffers; + else if (FileIDs[i].IDType == FileIDInfo::MacroExpansion) + ++NumMacros; + else + assert(0 && "Unknown FileID!"); + } + std::cerr << " " << NumBuffers << " normal buffer FileID's, " + << NumMacros << " macro expansion FileID's.\n"; + + + + unsigned NumLineNumsComputed = 0; + unsigned NumFileBytesMapped = 0; + for (std::map<const FileEntry *, FileInfo>::const_iterator I = + FileInfos.begin(), E = FileInfos.end(); I != E; ++I) { + NumLineNumsComputed += I->second.SourceLineCache != 0; + NumFileBytesMapped += I->second.Buffer->getBufferSize(); + } + std::cerr << NumFileBytesMapped << " bytes of files mapped, " + << NumLineNumsComputed << " files with line #'s computed.\n"; +} diff --git a/Basic/TargetInfo.cpp b/Basic/TargetInfo.cpp new file mode 100644 index 0000000000..008e99b914 --- /dev/null +++ b/Basic/TargetInfo.cpp @@ -0,0 +1,223 @@ +//===--- TargetInfo.cpp - Information about Target machine ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetInfo and TargetInfoImpl interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/AST/Builtins.h" +#include <map> +#include <set> +using namespace clang; + +void TargetInfoImpl::ANCHOR() {} // out-of-line virtual method for class. + + +/// DiagnoseNonPortability - When a use of a non-portable target feature is +/// used, this method emits the diagnostic and marks the translation unit as +/// non-portable. +void TargetInfo::DiagnoseNonPortability(SourceLocation Loc, unsigned DiagKind) { + NonPortable = true; + if (Diag && Loc.isValid()) Diag->Report(Loc, DiagKind); +} + +/// GetTargetDefineMap - Get the set of target #defines in an associative +/// collection for easy lookup. +static void GetTargetDefineMap(const TargetInfoImpl *Target, + std::map<std::string, std::string> &Map) { + std::vector<std::string> PrimaryDefines; + Target->getTargetDefines(PrimaryDefines); + + while (!PrimaryDefines.empty()) { + const char *Str = PrimaryDefines.back().c_str(); + if (const char *Equal = strchr(Str, '=')) { + // Split at the '='. + Map.insert(std::make_pair(std::string(Str, Equal), + std::string(Equal+1, + Str+PrimaryDefines.back().size()))); + } else { + // Remember "macroname=1". + Map.insert(std::make_pair(PrimaryDefines.back(), std::string("1"))); + } + PrimaryDefines.pop_back(); + } +} + +/// getTargetDefines - Appends the target-specific #define values for this +/// target set to the specified buffer. +void TargetInfo::getTargetDefines(std::vector<char> &Buffer) { + // This is tricky in the face of secondary targets. Specifically, + // target-specific #defines that are present and identical across all + // secondary targets are turned into #defines, #defines that are present in + // the primary target but are missing or different in the secondary targets + // are turned into #define_target, and #defines that are not defined in the + // primary, but are defined in a secondary are turned into + // #define_other_target. This allows the preprocessor to correctly track uses + // of target-specific macros. + + // Get the set of primary #defines. + std::map<std::string, std::string> PrimaryDefines; + GetTargetDefineMap(PrimaryTarget, PrimaryDefines); + + // If we have no secondary targets, be a bit more efficient. + if (SecondaryTargets.empty()) { + for (std::map<std::string, std::string>::iterator I = + PrimaryDefines.begin(), E = PrimaryDefines.end(); I != E; ++I) { + // If this define is non-portable, turn it into #define_target, otherwise + // just use #define. + const char *Command = "#define "; + Buffer.insert(Buffer.end(), Command, Command+strlen(Command)); + + // Insert "defname defvalue\n". + Buffer.insert(Buffer.end(), I->first.begin(), I->first.end()); + Buffer.push_back(' '); + Buffer.insert(Buffer.end(), I->second.begin(), I->second.end()); + Buffer.push_back('\n'); + } + return; + } + + // Get the sets of secondary #defines. + std::vector<std::map<std::string, std::string> > SecondaryDefines; + SecondaryDefines.resize(SecondaryTargets.size()); + for (unsigned i = 0, e = SecondaryTargets.size(); i != e; ++i) + GetTargetDefineMap(SecondaryTargets[i], SecondaryDefines[i]); + + // Loop over all defines in the primary target, processing them until we run + // out. + while (!PrimaryDefines.empty()) { + std::string DefineName = PrimaryDefines.begin()->first; + std::string DefineValue = PrimaryDefines.begin()->second; + PrimaryDefines.erase(PrimaryDefines.begin()); + + // Check to see whether all secondary targets have this #define and whether + // it is to the same value. Remember if not, but remove the #define from + // their collection in any case if they have it. + bool isPortable = true; + + for (unsigned i = 0, e = SecondaryDefines.size(); i != e; ++i) { + std::map<std::string, std::string>::iterator I = + SecondaryDefines[i].find(DefineName); + if (I == SecondaryDefines[i].end()) { + // Secondary target doesn't have this #define. + isPortable = false; + } else { + // Secondary target has this define, remember if it disagrees. + if (isPortable) + isPortable = I->second == DefineValue; + // Remove it from the secondary target unconditionally. + SecondaryDefines[i].erase(I); + } + } + + // If this define is non-portable, turn it into #define_target, otherwise + // just use #define. + const char *Command = isPortable ? "#define " : "#define_target "; + Buffer.insert(Buffer.end(), Command, Command+strlen(Command)); + + // Insert "defname defvalue\n". + Buffer.insert(Buffer.end(), DefineName.begin(), DefineName.end()); + Buffer.push_back(' '); + Buffer.insert(Buffer.end(), DefineValue.begin(), DefineValue.end()); + Buffer.push_back('\n'); + } + + // Now that all of the primary target's defines have been handled and removed + // from the secondary target's define sets, go through the remaining secondary + // target's #defines and taint them. + for (unsigned i = 0, e = SecondaryDefines.size(); i != e; ++i) { + std::map<std::string, std::string> &Defs = SecondaryDefines[i]; + while (!Defs.empty()) { + const std::string &DefName = Defs.begin()->first; + + // Insert "#define_other_target defname". + const char *Command = "#define_other_target "; + Buffer.insert(Buffer.end(), Command, Command+strlen(Command)); + Buffer.insert(Buffer.end(), DefName.begin(), DefName.end()); + Buffer.push_back('\n'); + + // If any other secondary targets have this same define, remove it from + // them to avoid duplicate #define_other_target directives. + for (unsigned j = i+1; j != e; ++j) + SecondaryDefines[j].erase(DefName); + + Defs.erase(Defs.begin()); + } + } +} + +/// ComputeWCharWidth - Determine the width of the wchar_t type for the primary +/// target, diagnosing whether this is non-portable across the secondary +/// targets. +void TargetInfo::ComputeWCharWidth(SourceLocation Loc) { + WCharWidth = PrimaryTarget->getWCharWidth(); + + // Check whether this is portable across the secondary targets if the T-U is + // portable so far. + for (unsigned i = 0, e = SecondaryTargets.size(); i != e; ++i) + if (SecondaryTargets[i]->getWCharWidth() != WCharWidth) + return DiagnoseNonPortability(Loc, diag::port_wchar_t); +} + + +/// getTargetBuiltins - Return information about target-specific builtins for +/// the current primary target, and info about which builtins are non-portable +/// across the current set of primary and secondary targets. +void TargetInfo::getTargetBuiltins(const Builtin::Info *&Records, + unsigned &NumRecords, + std::vector<const char *> &NPortable) const { + // Get info about what actual builtins we will expose. + PrimaryTarget->getTargetBuiltins(Records, NumRecords); + if (SecondaryTargets.empty()) return; + + // Compute the set of non-portable builtins. + + // Start by computing a mapping from the primary target's builtins to their + // info records for efficient lookup. + std::map<std::string, const Builtin::Info*> PrimaryRecs; + for (unsigned i = 0, e = NumRecords; i != e; ++i) + PrimaryRecs[Records[i].Name] = Records+i; + + for (unsigned i = 0, e = SecondaryTargets.size(); i != e; ++i) { + // Get the builtins for this secondary target. + const Builtin::Info *Records2nd; + unsigned NumRecords2nd; + SecondaryTargets[i]->getTargetBuiltins(Records2nd, NumRecords2nd); + + // Remember all of the secondary builtin names. + std::set<std::string> BuiltinNames2nd; + + for (unsigned j = 0, e = NumRecords2nd; j != e; ++j) { + BuiltinNames2nd.insert(Records2nd[j].Name); + + // Check to see if the primary target has this builtin. + if (const Builtin::Info *PrimBI = PrimaryRecs[Records2nd[j].Name]) { + // If does. If they are not identical, mark the builtin as being + // non-portable. + if (Records2nd[j] != *PrimBI) + NPortable.push_back(PrimBI->Name); + } else { + // The primary target doesn't have this, it is non-portable. + NPortable.push_back(Records2nd[j].Name); + } + } + + // Now that we checked all the secondary builtins, check to see if the + // primary target has any builtins that the secondary one doesn't. If so, + // then those are non-portable. + for (unsigned j = 0, e = NumRecords; j != e; ++j) { + if (!BuiltinNames2nd.count(Records[j].Name)) + NPortable.push_back(Records[j].Name); + } + } +} + + diff --git a/Basic/TokenKinds.cpp b/Basic/TokenKinds.cpp new file mode 100644 index 0000000000..772925b1e5 --- /dev/null +++ b/Basic/TokenKinds.cpp @@ -0,0 +1,28 @@ +//===--- TokenKinds.cpp - Token Kinds Support -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TokenKind enum and support functions. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/TokenKinds.h" +#include <cassert> +using namespace clang; + +static const char * const TokNames[] = { +#define TOK(X) #X, +#define KEYWORD(X,Y) #X, +#include "clang/Basic/TokenKinds.def" + 0 +}; + +const char *tok::getTokenName(enum TokenKind Kind) { + assert(Kind < tok::NUM_TOKENS); + return TokNames[Kind]; +} diff --git a/CodeGen/CGDecl.cpp b/CodeGen/CGDecl.cpp new file mode 100644 index 0000000000..822aca3f55 --- /dev/null +++ b/CodeGen/CGDecl.cpp @@ -0,0 +1,119 @@ +//===--- CGDecl.cpp - Emit LLVM Code for declarations ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit Decl nodes as LLVM code. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "clang/AST/AST.h" +#include "llvm/Type.h" +using namespace clang; +using namespace CodeGen; + + +void CodeGenFunction::EmitDecl(const Decl &D) { + + switch (D.getKind()) { + default: assert(0 && "Unknown decl kind!"); + case Decl::FileVariable: + assert(0 && "Should not see file-scope variables inside a function!"); + case Decl::ParmVariable: + assert(0 && "Parmdecls should not be in declstmts!"); + case Decl::Typedef: // typedef int X; + case Decl::Function: // void X(); + case Decl::Struct: // struct X; + case Decl::Union: // union X; + case Decl::Class: // class X; + case Decl::Enum: // enum X; + // None of these decls require codegen support. + return; + + case Decl::BlockVariable: + return EmitBlockVarDecl(cast<BlockVarDecl>(D)); + case Decl::EnumConstant: + return EmitEnumConstantDecl(cast<EnumConstantDecl>(D)); + } +} + +void CodeGenFunction::EmitEnumConstantDecl(const EnumConstantDecl &D) { + assert(0 && "FIXME: Enum constant decls not implemented yet!"); +} + +/// EmitBlockVarDecl - This method handles emission of any variable declaration +/// inside a function, including static vars etc. +void CodeGenFunction::EmitBlockVarDecl(const BlockVarDecl &D) { + switch (D.getStorageClass()) { + case VarDecl::Static: + assert(0 && "FIXME: local static vars not implemented yet"); + case VarDecl::Extern: + assert(0 && "FIXME: should call up to codegenmodule"); + default: + assert((D.getStorageClass() == VarDecl::None || + D.getStorageClass() == VarDecl::Auto || + D.getStorageClass() == VarDecl::Register) && + "Unknown storage class"); + return EmitLocalBlockVarDecl(D); + } +} + +/// EmitLocalBlockVarDecl - Emit code and set up an entry in LocalDeclMap for a +/// variable declaration with auto, register, or no storage class specifier. +/// These turn into simple stack objects. +void CodeGenFunction::EmitLocalBlockVarDecl(const BlockVarDecl &D) { + QualType Ty = D.getCanonicalType(); + + llvm::Value *DeclPtr; + if (Ty->isConstantSizeType()) { + // A normal fixed sized variable becomes an alloca in the entry block. + const llvm::Type *LTy = ConvertType(Ty); + // TODO: Alignment + DeclPtr = CreateTempAlloca(LTy, D.getName()); + } else { + // TODO: Create a dynamic alloca. + assert(0 && "FIXME: Local VLAs not implemented yet"); + } + + llvm::Value *&DMEntry = LocalDeclMap[&D]; + assert(DMEntry == 0 && "Decl already exists in localdeclmap!"); + DMEntry = DeclPtr; + + // FIXME: Evaluate initializer. +} + +/// Emit an alloca for the specified parameter and set up LocalDeclMap. +void CodeGenFunction::EmitParmDecl(const ParmVarDecl &D, llvm::Value *Arg) { + QualType Ty = D.getCanonicalType(); + + llvm::Value *DeclPtr; + if (!Ty->isConstantSizeType()) { + // Variable sized values always are passed by-reference. + DeclPtr = Arg; + } else { + // A fixed sized first class variable becomes an alloca in the entry block. + const llvm::Type *LTy = ConvertType(Ty); + if (LTy->isFirstClassType()) { + // TODO: Alignment + DeclPtr = new llvm::AllocaInst(LTy, 0, std::string(D.getName())+".addr", + AllocaInsertPt); + + // Store the initial value into the alloca. + Builder.CreateStore(Arg, DeclPtr); + } else { + // Otherwise, if this is an aggregate, just use the input pointer. + DeclPtr = Arg; + } + Arg->setName(D.getName()); + } + + llvm::Value *&DMEntry = LocalDeclMap[&D]; + assert(DMEntry == 0 && "Decl already exists in localdeclmap!"); + DMEntry = DeclPtr; +} + diff --git a/CodeGen/CGExpr.cpp b/CodeGen/CGExpr.cpp new file mode 100644 index 0000000000..936770e42d --- /dev/null +++ b/CodeGen/CGExpr.cpp @@ -0,0 +1,1211 @@ +//===--- CGExpr.cpp - Emit LLVM Code from Expressions ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit Expr nodes as LLVM code. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "CodeGenModule.h" +#include "clang/AST/AST.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +using namespace clang; +using namespace CodeGen; + +//===--------------------------------------------------------------------===// +// Miscellaneous Helper Methods +//===--------------------------------------------------------------------===// + +/// CreateTempAlloca - This creates a alloca and inserts it into the entry +/// block. +llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(const llvm::Type *Ty, + const char *Name) { + return new llvm::AllocaInst(Ty, 0, Name, AllocaInsertPt); +} + +/// EvaluateExprAsBool - Perform the usual unary conversions on the specified +/// expression and compare the result against zero, returning an Int1Ty value. +llvm::Value *CodeGenFunction::EvaluateExprAsBool(const Expr *E) { + QualType Ty; + RValue Val = EmitExprWithUsualUnaryConversions(E, Ty); + return ConvertScalarValueToBool(Val, Ty); +} + +/// EmitLoadOfComplex - Given an RValue reference for a complex, emit code to +/// load the real and imaginary pieces, returning them as Real/Imag. +void CodeGenFunction::EmitLoadOfComplex(RValue V, + llvm::Value *&Real, llvm::Value *&Imag){ + llvm::Value *Ptr = V.getAggregateAddr(); + + llvm::Constant *Zero = llvm::ConstantInt::get(llvm::Type::Int32Ty, 0); + llvm::Constant *One = llvm::ConstantInt::get(llvm::Type::Int32Ty, 1); + llvm::Value *RealPtr = Builder.CreateGEP(Ptr, Zero, Zero, "realp"); + llvm::Value *ImagPtr = Builder.CreateGEP(Ptr, Zero, One, "imagp"); + + // FIXME: Handle volatility. + Real = Builder.CreateLoad(RealPtr, "real"); + Imag = Builder.CreateLoad(ImagPtr, "imag"); +} + +/// EmitStoreOfComplex - Store the specified real/imag parts into the +/// specified value pointer. +void CodeGenFunction::EmitStoreOfComplex(llvm::Value *Real, llvm::Value *Imag, + llvm::Value *ResPtr) { + llvm::Constant *Zero = llvm::ConstantInt::get(llvm::Type::Int32Ty, 0); + llvm::Constant *One = llvm::ConstantInt::get(llvm::Type::Int32Ty, 1); + llvm::Value *RealPtr = Builder.CreateGEP(ResPtr, Zero, Zero, "real"); + llvm::Value *ImagPtr = Builder.CreateGEP(ResPtr, Zero, One, "imag"); + + // FIXME: Handle volatility. + Builder.CreateStore(Real, RealPtr); + Builder.CreateStore(Imag, ImagPtr); +} + +//===--------------------------------------------------------------------===// +// Conversions +//===--------------------------------------------------------------------===// + +/// EmitConversion - Convert the value specied by Val, whose type is ValTy, to +/// the type specified by DstTy, following the rules of C99 6.3. +RValue CodeGenFunction::EmitConversion(RValue Val, QualType ValTy, + QualType DstTy) { + ValTy = ValTy.getCanonicalType(); + DstTy = DstTy.getCanonicalType(); + if (ValTy == DstTy) return Val; + + // Handle conversions to bool first, they are special: comparisons against 0. + if (const BuiltinType *DestBT = dyn_cast<BuiltinType>(DstTy)) + if (DestBT->getKind() == BuiltinType::Bool) + return RValue::get(ConvertScalarValueToBool(Val, ValTy)); + + // Handle pointer conversions next: pointers can only be converted to/from + // other pointers and integers. + if (isa<PointerType>(DstTy)) { + const llvm::Type *DestTy = ConvertType(DstTy); + + // The source value may be an integer, or a pointer. + assert(Val.isScalar() && "Can only convert from integer or pointer"); + if (isa<llvm::PointerType>(Val.getVal()->getType())) + return RValue::get(Builder.CreateBitCast(Val.getVal(), DestTy, "conv")); + assert(ValTy->isIntegerType() && "Not ptr->ptr or int->ptr conversion?"); + return RValue::get(Builder.CreatePtrToInt(Val.getVal(), DestTy, "conv")); + } + + if (isa<PointerType>(ValTy)) { + // Must be an ptr to int cast. + const llvm::Type *DestTy = ConvertType(DstTy); + assert(isa<llvm::IntegerType>(DestTy) && "not ptr->int?"); + return RValue::get(Builder.CreateIntToPtr(Val.getVal(), DestTy, "conv")); + } + + // Finally, we have the arithmetic types: real int/float and complex + // int/float. Handle real->real conversions first, they are the most + // common. + if (Val.isScalar() && DstTy->isRealType()) { + // We know that these are representable as scalars in LLVM, convert to LLVM + // types since they are easier to reason about. + llvm::Value *SrcVal = Val.getVal(); + const llvm::Type *DestTy = ConvertType(DstTy); + if (SrcVal->getType() == DestTy) return Val; + + llvm::Value *Result; + if (isa<llvm::IntegerType>(SrcVal->getType())) { + bool InputSigned = ValTy->isSignedIntegerType(); + if (isa<llvm::IntegerType>(DestTy)) + Result = Builder.CreateIntCast(SrcVal, DestTy, InputSigned, "conv"); + else if (InputSigned) + Result = Builder.CreateSIToFP(SrcVal, DestTy, "conv"); + else + Result = Builder.CreateUIToFP(SrcVal, DestTy, "conv"); + } else { + assert(SrcVal->getType()->isFloatingPoint() && "Unknown real conversion"); + if (isa<llvm::IntegerType>(DestTy)) { + if (DstTy->isSignedIntegerType()) + Result = Builder.CreateFPToSI(SrcVal, DestTy, "conv"); + else + Result = Builder.CreateFPToUI(SrcVal, DestTy, "conv"); + } else { + assert(DestTy->isFloatingPoint() && "Unknown real conversion"); + if (DestTy->getTypeID() < SrcVal->getType()->getTypeID()) + Result = Builder.CreateFPTrunc(SrcVal, DestTy, "conv"); + else + Result = Builder.CreateFPExt(SrcVal, DestTy, "conv"); + } + } + return RValue::get(Result); + } + + assert(0 && "FIXME: We don't support complex conversions yet!"); +} + + +/// ConvertScalarValueToBool - Convert the specified expression value to a +/// boolean (i1) truth value. This is equivalent to "Val == 0". +llvm::Value *CodeGenFunction::ConvertScalarValueToBool(RValue Val, QualType Ty){ + Ty = Ty.getCanonicalType(); + llvm::Value *Result; + if (const BuiltinType *BT = dyn_cast<BuiltinType>(Ty)) { + switch (BT->getKind()) { + default: assert(0 && "Unknown scalar value"); + case BuiltinType::Bool: + Result = Val.getVal(); + // Bool is already evaluated right. + assert(Result->getType() == llvm::Type::Int1Ty && + "Unexpected bool value type!"); + return Result; + case BuiltinType::Char_S: + case BuiltinType::Char_U: + case BuiltinType::SChar: + case BuiltinType::UChar: + case BuiltinType::Short: + case BuiltinType::UShort: + case BuiltinType::Int: + case BuiltinType::UInt: + case BuiltinType::Long: + case BuiltinType::ULong: + case BuiltinType::LongLong: + case BuiltinType::ULongLong: + // Code below handles simple integers. + break; + case BuiltinType::Float: + case BuiltinType::Double: + case BuiltinType::LongDouble: { + // Compare against 0.0 for fp scalars. + Result = Val.getVal(); + llvm::Value *Zero = llvm::Constant::getNullValue(Result->getType()); + // FIXME: llvm-gcc produces a une comparison: validate this is right. + Result = Builder.CreateFCmpUNE(Result, Zero, "tobool"); + return Result; + } + } + } else if (isa<PointerType>(Ty) || + cast<TagType>(Ty)->getDecl()->getKind() == Decl::Enum) { + // Code below handles this fine. + } else { + assert(isa<ComplexType>(Ty) && "Unknwon type!"); + assert(0 && "FIXME: comparisons against complex not implemented yet"); + } + + // Usual case for integers, pointers, and enums: compare against zero. + Result = Val.getVal(); + + // Because of the type rules of C, we often end up computing a logical value, + // then zero extending it to int, then wanting it as a logical value again. + // Optimize this common case. + if (llvm::ZExtInst *ZI = dyn_cast<llvm::ZExtInst>(Result)) { + if (ZI->getOperand(0)->getType() == llvm::Type::Int1Ty) { + Result = ZI->getOperand(0); + ZI->eraseFromParent(); + return Result; + } + } + + llvm::Value *Zero = llvm::Constant::getNullValue(Result->getType()); + return Builder.CreateICmpNE(Result, Zero, "tobool"); +} + +//===----------------------------------------------------------------------===// +// LValue Expression Emission +//===----------------------------------------------------------------------===// + +/// EmitLValue - Emit code to compute a designator that specifies the location +/// of the expression. +/// +/// This can return one of two things: a simple address or a bitfield +/// reference. In either case, the LLVM Value* in the LValue structure is +/// guaranteed to be an LLVM pointer type. +/// +/// If this returns a bitfield reference, nothing about the pointee type of +/// the LLVM value is known: For example, it may not be a pointer to an +/// integer. +/// +/// If this returns a normal address, and if the lvalue's C type is fixed +/// size, this method guarantees that the returned pointer type will point to +/// an LLVM type of the same size of the lvalue's type. If the lvalue has a +/// variable length type, this is not possible. +/// +LValue CodeGenFunction::EmitLValue(const Expr *E) { + switch (E->getStmtClass()) { + default: + fprintf(stderr, "Unimplemented lvalue expr!\n"); + E->dump(); + return LValue::MakeAddr(llvm::UndefValue::get( + llvm::PointerType::get(llvm::Type::Int32Ty))); + + case Expr::DeclRefExprClass: return EmitDeclRefLValue(cast<DeclRefExpr>(E)); + case Expr::ParenExprClass:return EmitLValue(cast<ParenExpr>(E)->getSubExpr()); + case Expr::StringLiteralClass: + return EmitStringLiteralLValue(cast<StringLiteral>(E)); + + case Expr::UnaryOperatorClass: + return EmitUnaryOpLValue(cast<UnaryOperator>(E)); + case Expr::ArraySubscriptExprClass: + return EmitArraySubscriptExpr(cast<ArraySubscriptExpr>(E)); + } +} + +/// EmitLoadOfLValue - Given an expression that represents a value lvalue, +/// this method emits the address of the lvalue, then loads the result as an +/// rvalue, returning the rvalue. +RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, QualType ExprType) { + ExprType = ExprType.getCanonicalType(); + + if (LV.isSimple()) { + llvm::Value *Ptr = LV.getAddress(); + const llvm::Type *EltTy = + cast<llvm::PointerType>(Ptr->getType())->getElementType(); + + // Simple scalar l-value. + if (EltTy->isFirstClassType()) + return RValue::get(Builder.CreateLoad(Ptr, "tmp")); + + // Otherwise, we have an aggregate lvalue. + return RValue::getAggregate(Ptr); + } + + if (LV.isVectorElt()) { + llvm::Value *Vec = Builder.CreateLoad(LV.getVectorAddr(), "tmp"); + return RValue::get(Builder.CreateExtractElement(Vec, LV.getVectorIdx(), + "vecext")); + } + + assert(0 && "Bitfield ref not impl!"); +} + +RValue CodeGenFunction::EmitLoadOfLValue(const Expr *E) { + return EmitLoadOfLValue(EmitLValue(E), E->getType()); +} + + +/// EmitStoreThroughLValue - Store the specified rvalue into the specified +/// lvalue, where both are guaranteed to the have the same type, and that type +/// is 'Ty'. +void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, + QualType Ty) { + if (Dst.isVectorElt()) { + // Read/modify/write the vector, inserting the new element. + // FIXME: Volatility. + llvm::Value *Vec = Builder.CreateLoad(Dst.getVectorAddr(), "tmp"); + Vec = Builder.CreateInsertElement(Vec, Src.getVal(), + Dst.getVectorIdx(), "vecins"); + Builder.CreateStore(Vec, Dst.getVectorAddr()); + return; + } + + assert(Dst.isSimple() && "FIXME: Don't support store to bitfield yet"); + + llvm::Value *DstAddr = Dst.getAddress(); + if (Src.isScalar()) { + // FIXME: Handle volatility etc. + const llvm::Type *SrcTy = Src.getVal()->getType(); + const llvm::Type *AddrTy = + cast<llvm::PointerType>(DstAddr->getType())->getElementType(); + + if (AddrTy != SrcTy) + DstAddr = Builder.CreateBitCast(DstAddr, llvm::PointerType::get(SrcTy), + "storetmp"); + Builder.CreateStore(Src.getVal(), DstAddr); + return; + } + + // Don't use memcpy for complex numbers. + if (Ty->isComplexType()) { + llvm::Value *Real, *Imag; + EmitLoadOfComplex(Src, Real, Imag); + EmitStoreOfComplex(Real, Imag, Dst.getAddress()); + return; + } + + // Aggregate assignment turns into llvm.memcpy. + const llvm::Type *SBP = llvm::PointerType::get(llvm::Type::Int8Ty); + llvm::Value *SrcAddr = Src.getAggregateAddr(); + + if (DstAddr->getType() != SBP) + DstAddr = Builder.CreateBitCast(DstAddr, SBP, "tmp"); + if (SrcAddr->getType() != SBP) + SrcAddr = Builder.CreateBitCast(SrcAddr, SBP, "tmp"); + + unsigned Align = 1; // FIXME: Compute type alignments. + unsigned Size = 1234; // FIXME: Compute type sizes. + + // FIXME: Handle variable sized types. + const llvm::Type *IntPtr = llvm::IntegerType::get(LLVMPointerWidth); + llvm::Value *SizeVal = llvm::ConstantInt::get(IntPtr, Size); + + llvm::Value *MemCpyOps[4] = { + DstAddr, SrcAddr, SizeVal,llvm::ConstantInt::get(llvm::Type::Int32Ty, Align) + }; + + Builder.CreateCall(CGM.getMemCpyFn(), MemCpyOps, 4); +} + + +LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { + const Decl *D = E->getDecl(); + if (isa<BlockVarDecl>(D) || isa<ParmVarDecl>(D)) { + llvm::Value *V = LocalDeclMap[D]; + assert(V && "BlockVarDecl not entered in LocalDeclMap?"); + return LValue::MakeAddr(V); + } else if (isa<FunctionDecl>(D) || isa<FileVarDecl>(D)) { + return LValue::MakeAddr(CGM.GetAddrOfGlobalDecl(D)); + } + assert(0 && "Unimp declref"); +} + +LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) { + // __extension__ doesn't affect lvalue-ness. + if (E->getOpcode() == UnaryOperator::Extension) + return EmitLValue(E->getSubExpr()); + + assert(E->getOpcode() == UnaryOperator::Deref && + "'*' is the only unary operator that produces an lvalue"); + return LValue::MakeAddr(EmitExpr(E->getSubExpr()).getVal()); +} + +LValue CodeGenFunction::EmitStringLiteralLValue(const StringLiteral *E) { + assert(!E->isWide() && "FIXME: Wide strings not supported yet!"); + const char *StrData = E->getStrData(); + unsigned Len = E->getByteLength(); + + // FIXME: Can cache/reuse these within the module. + llvm::Constant *C=llvm::ConstantArray::get(std::string(StrData, StrData+Len)); + + // Create a global variable for this. + C = new llvm::GlobalVariable(C->getType(), true, + llvm::GlobalValue::InternalLinkage, + C, ".str", CurFn->getParent()); + llvm::Constant *Zero = llvm::Constant::getNullValue(llvm::Type::Int32Ty); + llvm::Constant *Zeros[] = { Zero, Zero }; + C = llvm::ConstantExpr::getGetElementPtr(C, Zeros, 2); + return LValue::MakeAddr(C); +} + +LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E) { + // The index must always be a pointer or integer, neither of which is an + // aggregate. Emit it. + QualType IdxTy; + llvm::Value *Idx = + EmitExprWithUsualUnaryConversions(E->getIdx(), IdxTy).getVal(); + + // If the base is a vector type, then we are forming a vector element lvalue + // with this subscript. + if (E->getBase()->getType()->isVectorType()) { + // Emit the vector as an lvalue to get its address. + LValue Base = EmitLValue(E->getBase()); + assert(Base.isSimple() && "Can only subscript lvalue vectors here!"); + // FIXME: This should properly sign/zero/extend or truncate Idx to i32. + return LValue::MakeVectorElt(Base.getAddress(), Idx); + } + + // At this point, the base must be a pointer or integer, neither of which are + // aggregates. Emit it. + QualType BaseTy; + llvm::Value *Base = + EmitExprWithUsualUnaryConversions(E->getBase(), BaseTy).getVal(); + + // Usually the base is the pointer type, but sometimes it is the index. + // Canonicalize to have the pointer as the base. + if (isa<llvm::PointerType>(Idx->getType())) { + std::swap(Base, Idx); + std::swap(BaseTy, IdxTy); + } + + // The pointer is now the base. Extend or truncate the index type to 32 or + // 64-bits. + bool IdxSigned = IdxTy->isSignedIntegerType(); + unsigned IdxBitwidth = cast<llvm::IntegerType>(Idx->getType())->getBitWidth(); + if (IdxBitwidth != LLVMPointerWidth) + Idx = Builder.CreateIntCast(Idx, llvm::IntegerType::get(LLVMPointerWidth), + IdxSigned, "idxprom"); + + // We know that the pointer points to a type of the correct size, unless the + // size is a VLA. + if (!E->getType()->isConstantSizeType()) + assert(0 && "VLA idx not implemented"); + return LValue::MakeAddr(Builder.CreateGEP(Base, Idx, "arrayidx")); +} + +//===--------------------------------------------------------------------===// +// Expression Emission +//===--------------------------------------------------------------------===// + +RValue CodeGenFunction::EmitExpr(const Expr *E) { + assert(E && "Null expression?"); + + switch (E->getStmtClass()) { + default: + fprintf(stderr, "Unimplemented expr!\n"); + E->dump(); + return RValue::get(llvm::UndefValue::get(llvm::Type::Int32Ty)); + + // l-values. + case Expr::DeclRefExprClass: + // DeclRef's of EnumConstantDecl's are simple rvalues. + if (const EnumConstantDecl *EC = + dyn_cast<EnumConstantDecl>(cast<DeclRefExpr>(E)->getDecl())) + return RValue::get(llvm::ConstantInt::get(EC->getInitVal())); + return EmitLoadOfLValue(E); + case Expr::ArraySubscriptExprClass: + return EmitArraySubscriptExprRV(cast<ArraySubscriptExpr>(E)); + case Expr::StringLiteralClass: + return RValue::get(EmitLValue(E).getAddress()); + + // Leaf expressions. + case Expr::IntegerLiteralClass: + return EmitIntegerLiteral(cast<IntegerLiteral>(E)); + case Expr::FloatingLiteralClass: + return EmitFloatingLiteral(cast<FloatingLiteral>(E)); + + // Operators. + case Expr::ParenExprClass: + return EmitExpr(cast<ParenExpr>(E)->getSubExpr()); + case Expr::UnaryOperatorClass: + return EmitUnaryOperator(cast<UnaryOperator>(E)); + case Expr::CastExprClass: + return EmitCastExpr(cast<CastExpr>(E)); + case Expr::CallExprClass: + return EmitCallExpr(cast<CallExpr>(E)); + case Expr::BinaryOperatorClass: + return EmitBinaryOperator(cast<BinaryOperator>(E)); + } + +} + +RValue CodeGenFunction::EmitIntegerLiteral(const IntegerLiteral *E) { + return RValue::get(llvm::ConstantInt::get(E->getValue())); +} +RValue CodeGenFunction::EmitFloatingLiteral(const FloatingLiteral *E) { + return RValue::get(llvm::ConstantFP::get(ConvertType(E->getType()), + E->getValue())); +} + + +RValue CodeGenFunction::EmitArraySubscriptExprRV(const ArraySubscriptExpr *E) { + // Emit subscript expressions in rvalue context's. For most cases, this just + // loads the lvalue formed by the subscript expr. However, we have to be + // careful, because the base of a vector subscript is occasionally an rvalue, + // so we can't get it as an lvalue. + if (!E->getBase()->getType()->isVectorType()) + return EmitLoadOfLValue(E); + + // Handle the vector case. The base must be a vector, the index must be an + // integer value. + QualType BaseTy, IdxTy; + llvm::Value *Base = + EmitExprWithUsualUnaryConversions(E->getBase(), BaseTy).getVal(); + llvm::Value *Idx = + EmitExprWithUsualUnaryConversions(E->getIdx(), IdxTy).getVal(); + + // FIXME: Convert Idx to i32 type. + + return RValue::get(Builder.CreateExtractElement(Base, Idx, "vecext")); +} + + +RValue CodeGenFunction::EmitCastExpr(const CastExpr *E) { + QualType SrcTy; + RValue Src = EmitExprWithUsualUnaryConversions(E->getSubExpr(), SrcTy); + + // If the destination is void, just evaluate the source. + if (E->getType()->isVoidType()) + return RValue::getAggregate(0); + + return EmitConversion(Src, SrcTy, E->getType()); +} + +RValue CodeGenFunction::EmitCallExpr(const CallExpr *E) { + QualType CalleeTy; + llvm::Value *Callee = + EmitExprWithUsualUnaryConversions(E->getCallee(), CalleeTy).getVal(); + + // The callee type will always be a pointer to function type, get the function + // type. + CalleeTy = cast<PointerType>(CalleeTy.getCanonicalType())->getPointeeType(); + + // Get information about the argument types. + FunctionTypeProto::arg_type_iterator ArgTyIt = 0, ArgTyEnd = 0; + + // Calling unprototyped functions provides no argument info. + if (const FunctionTypeProto *FTP = dyn_cast<FunctionTypeProto>(CalleeTy)) { + ArgTyIt = FTP->arg_type_begin(); + ArgTyEnd = FTP->arg_type_end(); + } + + llvm::SmallVector<llvm::Value*, 16> Args; + + // FIXME: Handle struct return. + for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { + QualType ArgTy; + RValue ArgVal = EmitExprWithUsualUnaryConversions(E->getArg(i), ArgTy); + + // If this argument has prototype information, convert it. + if (ArgTyIt != ArgTyEnd) { + ArgVal = EmitConversion(ArgVal, ArgTy, *ArgTyIt++); + } else { + // Otherwise, if passing through "..." or to a function with no prototype, + // perform the "default argument promotions" (C99 6.5.2.2p6), which + // includes the usual unary conversions, but also promotes float to + // double. + if (const BuiltinType *BT = + dyn_cast<BuiltinType>(ArgTy.getCanonicalType())) { + if (BT->getKind() == BuiltinType::Float) + ArgVal = RValue::get(Builder.CreateFPExt(ArgVal.getVal(), + llvm::Type::DoubleTy,"tmp")); + } + } + + + if (ArgVal.isScalar()) + Args.push_back(ArgVal.getVal()); + else // Pass by-address. FIXME: Set attribute bit on call. + Args.push_back(ArgVal.getAggregateAddr()); + } + + llvm::Value *V = Builder.CreateCall(Callee, &Args[0], Args.size()); + if (V->getType() != llvm::Type::VoidTy) + V->setName("call"); + + // FIXME: Struct return; + return RValue::get(V); +} + + +//===----------------------------------------------------------------------===// +// Unary Operator Emission +//===----------------------------------------------------------------------===// + +RValue CodeGenFunction::EmitExprWithUsualUnaryConversions(const Expr *E, + QualType &ResTy) { + ResTy = E->getType().getCanonicalType(); + + if (isa<FunctionType>(ResTy)) { // C99 6.3.2.1p4 + // Functions are promoted to their address. + ResTy = getContext().getPointerType(ResTy); + return RValue::get(EmitLValue(E).getAddress()); + } else if (const ArrayType *ary = dyn_cast<ArrayType>(ResTy)) { + // C99 6.3.2.1p3 + ResTy = getContext().getPointerType(ary->getElementType()); + + // FIXME: For now we assume that all source arrays map to LLVM arrays. This + // will not true when we add support for VLAs. + llvm::Value *V = EmitLValue(E).getAddress(); // Bitfields can't be arrays. + + assert(isa<llvm::PointerType>(V->getType()) && + isa<llvm::ArrayType>(cast<llvm::PointerType>(V->getType()) + ->getElementType()) && + "Doesn't support VLAs yet!"); + llvm::Constant *Idx0 = llvm::ConstantInt::get(llvm::Type::Int32Ty, 0); + return RValue::get(Builder.CreateGEP(V, Idx0, Idx0, "arraydecay")); + } else if (ResTy->isPromotableIntegerType()) { // C99 6.3.1.1p2 + // FIXME: this probably isn't right, pending clarification from Steve. + llvm::Value *Val = EmitExpr(E).getVal(); + + // If the input is a signed integer, sign extend to the destination. + if (ResTy->isSignedIntegerType()) { + Val = Builder.CreateSExt(Val, LLVMIntTy, "promote"); + } else { + // This handles unsigned types, including bool. + Val = Builder.CreateZExt(Val, LLVMIntTy, "promote"); + } + ResTy = getContext().IntTy; + + return RValue::get(Val); + } + + // Otherwise, this is a float, double, int, struct, etc. + return EmitExpr(E); +} + + +RValue CodeGenFunction::EmitUnaryOperator(const UnaryOperator *E) { + switch (E->getOpcode()) { + default: + printf("Unimplemented unary expr!\n"); + E->dump(); + return RValue::get(llvm::UndefValue::get(llvm::Type::Int32Ty)); + // FIXME: pre/post inc/dec + case UnaryOperator::AddrOf: return EmitUnaryAddrOf(E); + case UnaryOperator::Deref : return EmitLoadOfLValue(E); + case UnaryOperator::Plus : return EmitUnaryPlus(E); + case UnaryOperator::Minus : return EmitUnaryMinus(E); + case UnaryOperator::Not : return EmitUnaryNot(E); + case UnaryOperator::LNot : return EmitUnaryLNot(E); + // FIXME: SIZEOF/ALIGNOF(expr). + // FIXME: real/imag + case UnaryOperator::Extension: return EmitExpr(E->getSubExpr()); + } +} + +/// C99 6.5.3.2 +RValue CodeGenFunction::EmitUnaryAddrOf(const UnaryOperator *E) { + // The address of the operand is just its lvalue. It cannot be a bitfield. + return RValue::get(EmitLValue(E->getSubExpr()).getAddress()); +} + +RValue CodeGenFunction::EmitUnaryPlus(const UnaryOperator *E) { + // Unary plus just performs promotions on its arithmetic operand. + QualType Ty; + return EmitExprWithUsualUnaryConversions(E->getSubExpr(), Ty); +} + +RValue CodeGenFunction::EmitUnaryMinus(const UnaryOperator *E) { + // Unary minus performs promotions, then negates its arithmetic operand. + QualType Ty; + RValue V = EmitExprWithUsualUnaryConversions(E->getSubExpr(), Ty); + + if (V.isScalar()) + return RValue::get(Builder.CreateNeg(V.getVal(), "neg")); + + assert(0 && "FIXME: This doesn't handle complex operands yet"); +} + +RValue CodeGenFunction::EmitUnaryNot(const UnaryOperator *E) { + // Unary not performs promotions, then complements its integer operand. + QualType Ty; + RValue V = EmitExprWithUsualUnaryConversions(E->getSubExpr(), Ty); + + if (V.isScalar()) + return RValue::get(Builder.CreateNot(V.getVal(), "neg")); + + assert(0 && "FIXME: This doesn't handle integer complex operands yet (GNU)"); +} + + +/// C99 6.5.3.3 +RValue CodeGenFunction::EmitUnaryLNot(const UnaryOperator *E) { + // Compare operand to zero. + llvm::Value *BoolVal = EvaluateExprAsBool(E->getSubExpr()); + + // Invert value. + // TODO: Could dynamically modify easy computations here. For example, if + // the operand is an icmp ne, turn into icmp eq. + BoolVal = Builder.CreateNot(BoolVal, "lnot"); + + // ZExt result to int. + return RValue::get(Builder.CreateZExt(BoolVal, LLVMIntTy, "lnot.ext")); +} + + +//===--------------------------------------------------------------------===// +// Binary Operator Emission +//===--------------------------------------------------------------------===// + +// FIXME describe. +QualType CodeGenFunction:: +EmitUsualArithmeticConversions(const BinaryOperator *E, RValue &LHS, + RValue &RHS) { + QualType LHSType, RHSType; + LHS = EmitExprWithUsualUnaryConversions(E->getLHS(), LHSType); + RHS = EmitExprWithUsualUnaryConversions(E->getRHS(), RHSType); + + // If both operands have the same source type, we're done already. + if (LHSType == RHSType) return LHSType; + + // If either side is a non-arithmetic type (e.g. a pointer), we are done. + // The caller can deal with this (e.g. pointer + int). + if (!LHSType->isArithmeticType() || !RHSType->isArithmeticType()) + return LHSType; + + // At this point, we have two different arithmetic types. + + // Handle complex types first (C99 6.3.1.8p1). + if (LHSType->isComplexType() || RHSType->isComplexType()) { + assert(0 && "FIXME: complex types unimp"); +#if 0 + // if we have an integer operand, the result is the complex type. + if (rhs->isIntegerType()) + return lhs; + if (lhs->isIntegerType()) + return rhs; + return Context.maxComplexType(lhs, rhs); +#endif + } + + // If neither operand is complex, they must be scalars. + llvm::Value *LHSV = LHS.getVal(); + llvm::Value *RHSV = RHS.getVal(); + + // If the LLVM types are already equal, then they only differed in sign, or it + // was something like char/signed char or double/long double. + if (LHSV->getType() == RHSV->getType()) + return LHSType; + + // Now handle "real" floating types (i.e. float, double, long double). + if (LHSType->isRealFloatingType() || RHSType->isRealFloatingType()) { + // if we have an integer operand, the result is the real floating type, and + // the integer converts to FP. + if (RHSType->isIntegerType()) { + // Promote the RHS to an FP type of the LHS, with the sign following the + // RHS. + if (RHSType->isSignedIntegerType()) + RHS = RValue::get(Builder.CreateSIToFP(RHSV,LHSV->getType(),"promote")); + else + RHS = RValue::get(Builder.CreateUIToFP(RHSV,LHSV->getType(),"promote")); + return LHSType; + } + + if (LHSType->isIntegerType()) { + // Promote the LHS to an FP type of the RHS, with the sign following the + // LHS. + if (LHSType->isSignedIntegerType()) + LHS = RValue::get(Builder.CreateSIToFP(LHSV,RHSV->getType(),"promote")); + else + LHS = RValue::get(Builder.CreateUIToFP(LHSV,RHSV->getType(),"promote")); + return RHSType; + } + + // Otherwise, they are two FP types. Promote the smaller operand to the + // bigger result. + QualType BiggerType = ASTContext::maxFloatingType(LHSType, RHSType); + + if (BiggerType == LHSType) + RHS = RValue::get(Builder.CreateFPExt(RHSV, LHSV->getType(), "promote")); + else + LHS = RValue::get(Builder.CreateFPExt(LHSV, RHSV->getType(), "promote")); + return BiggerType; + } + + // Finally, we have two integer types that are different according to C. Do + // a sign or zero extension if needed. + + // Otherwise, one type is smaller than the other. + QualType ResTy = ASTContext::maxIntegerType(LHSType, RHSType); + + if (LHSType == ResTy) { + if (RHSType->isSignedIntegerType()) + RHS = RValue::get(Builder.CreateSExt(RHSV, LHSV->getType(), "promote")); + else + RHS = RValue::get(Builder.CreateZExt(RHSV, LHSV->getType(), "promote")); + } else { + assert(RHSType == ResTy && "Unknown conversion"); + if (LHSType->isSignedIntegerType()) + LHS = RValue::get(Builder.CreateSExt(LHSV, RHSV->getType(), "promote")); + else + LHS = RValue::get(Builder.CreateZExt(LHSV, RHSV->getType(), "promote")); + } + return ResTy; +} + +/// EmitCompoundAssignmentOperands - Compound assignment operations (like +=) +/// are strange in that the result of the operation is not the same type as the +/// intermediate computation. This function emits the LHS and RHS operands of +/// the compound assignment, promoting them to their common computation type. +/// +/// Since the LHS is an lvalue, and the result is stored back through it, we +/// return the lvalue as well as the LHS/RHS rvalues. On return, the LHS and +/// RHS values are both in the computation type for the operator. +void CodeGenFunction:: +EmitCompoundAssignmentOperands(const CompoundAssignOperator *E, + LValue &LHSLV, RValue &LHS, RValue &RHS) { + LHSLV = EmitLValue(E->getLHS()); + + // Load the LHS and RHS operands. + QualType LHSTy = E->getLHS()->getType(); + LHS = EmitLoadOfLValue(LHSLV, LHSTy); + QualType RHSTy; + RHS = EmitExprWithUsualUnaryConversions(E->getRHS(), RHSTy); + + // Shift operands do the usual unary conversions, but do not do the binary + // conversions. + if (E->isShiftAssignOp()) { + // FIXME: This is broken. Implicit conversions should be made explicit, + // so that this goes away. This causes us to reload the LHS. + LHS = EmitExprWithUsualUnaryConversions(E->getLHS(), LHSTy); + } + + // Convert the LHS and RHS to the common evaluation type. + LHS = EmitConversion(LHS, LHSTy, E->getComputationType()); + RHS = EmitConversion(RHS, RHSTy, E->getComputationType()); +} + +/// EmitCompoundAssignmentResult - Given a result value in the computation type, +/// truncate it down to the actual result type, store it through the LHS lvalue, +/// and return it. +RValue CodeGenFunction:: +EmitCompoundAssignmentResult(const CompoundAssignOperator *E, + LValue LHSLV, RValue ResV) { + + // Truncate back to the destination type. + if (E->getComputationType() != E->getType()) + ResV = EmitConversion(ResV, E->getComputationType(), E->getType()); + + // Store the result value into the LHS. + EmitStoreThroughLValue(ResV, LHSLV, E->getType()); + + // Return the result. + return ResV; +} + + +RValue CodeGenFunction::EmitBinaryOperator(const BinaryOperator *E) { + RValue LHS, RHS; + switch (E->getOpcode()) { + default: + fprintf(stderr, "Unimplemented binary expr!\n"); + E->dump(); + return RValue::get(llvm::UndefValue::get(llvm::Type::Int32Ty)); + case BinaryOperator::Mul: + EmitUsualArithmeticConversions(E, LHS, RHS); + return EmitMul(LHS, RHS, E->getType()); + case BinaryOperator::Div: + EmitUsualArithmeticConversions(E, LHS, RHS); + return EmitDiv(LHS, RHS, E->getType()); + case BinaryOperator::Rem: + EmitUsualArithmeticConversions(E, LHS, RHS); + return EmitRem(LHS, RHS, E->getType()); + case BinaryOperator::Add: + // FIXME: This doesn't handle ptr+int etc yet. + EmitUsualArithmeticConversions(E, LHS, RHS); + return EmitAdd(LHS, RHS, E->getType()); + case BinaryOperator::Sub: + // FIXME: This doesn't handle ptr-int etc yet. + EmitUsualArithmeticConversions(E, LHS, RHS); + return EmitSub(LHS, RHS, E->getType()); + case BinaryOperator::Shl: + EmitShiftOperands(E, LHS, RHS); + return EmitShl(LHS, RHS, E->getType()); + case BinaryOperator::Shr: + EmitShiftOperands(E, LHS, RHS); + return EmitShr(LHS, RHS, E->getType()); + case BinaryOperator::And: + EmitUsualArithmeticConversions(E, LHS, RHS); + return EmitAnd(LHS, RHS, E->getType()); + case BinaryOperator::Xor: + EmitUsualArithmeticConversions(E, LHS, RHS); + return EmitXor(LHS, RHS, E->getType()); + case BinaryOperator::Or : + EmitUsualArithmeticConversions(E, LHS, RHS); + return EmitOr(LHS, RHS, E->getType()); + case BinaryOperator::LAnd: return EmitBinaryLAnd(E); + case BinaryOperator::LOr: return EmitBinaryLOr(E); + case BinaryOperator::LT: + return EmitBinaryCompare(E, llvm::ICmpInst::ICMP_ULT, + llvm::ICmpInst::ICMP_SLT, + llvm::FCmpInst::FCMP_OLT); + case BinaryOperator::GT: + return EmitBinaryCompare(E, llvm::ICmpInst::ICMP_UGT, + llvm::ICmpInst::ICMP_SGT, + llvm::FCmpInst::FCMP_OGT); + case BinaryOperator::LE: + return EmitBinaryCompare(E, llvm::ICmpInst::ICMP_ULE, + llvm::ICmpInst::ICMP_SLE, + llvm::FCmpInst::FCMP_OLE); + case BinaryOperator::GE: + return EmitBinaryCompare(E, llvm::ICmpInst::ICMP_UGE, + llvm::ICmpInst::ICMP_SGE, + llvm::FCmpInst::FCMP_OGE); + case BinaryOperator::EQ: + return EmitBinaryCompare(E, llvm::ICmpInst::ICMP_EQ, + llvm::ICmpInst::ICMP_EQ, + llvm::FCmpInst::FCMP_OEQ); + case BinaryOperator::NE: + return EmitBinaryCompare(E, llvm::ICmpInst::ICMP_NE, + llvm::ICmpInst::ICMP_NE, + llvm::FCmpInst::FCMP_UNE); + case BinaryOperator::Assign: + return EmitBinaryAssign(E); + + case BinaryOperator::MulAssign: { + const CompoundAssignOperator *CAO = cast<CompoundAssignOperator>(E); + LValue LHSLV; + EmitCompoundAssignmentOperands(CAO, LHSLV, LHS, RHS); + LHS = EmitMul(LHS, RHS, CAO->getComputationType()); + return EmitCompoundAssignmentResult(CAO, LHSLV, LHS); + } + case BinaryOperator::DivAssign: { + const CompoundAssignOperator *CAO = cast<CompoundAssignOperator>(E); + LValue LHSLV; + EmitCompoundAssignmentOperands(CAO, LHSLV, LHS, RHS); + LHS = EmitDiv(LHS, RHS, CAO->getComputationType()); + return EmitCompoundAssignmentResult(CAO, LHSLV, LHS); + } + case BinaryOperator::RemAssign: { + const CompoundAssignOperator *CAO = cast<CompoundAssignOperator>(E); + LValue LHSLV; + EmitCompoundAssignmentOperands(CAO, LHSLV, LHS, RHS); + LHS = EmitRem(LHS, RHS, CAO->getComputationType()); + return EmitCompoundAssignmentResult(CAO, LHSLV, LHS); + } + case BinaryOperator::AddAssign: { + const CompoundAssignOperator *CAO = cast<CompoundAssignOperator>(E); + LValue LHSLV; + EmitCompoundAssignmentOperands(CAO, LHSLV, LHS, RHS); + LHS = EmitAdd(LHS, RHS, CAO->getComputationType()); + return EmitCompoundAssignmentResult(CAO, LHSLV, LHS); + } + case BinaryOperator::SubAssign: { + const CompoundAssignOperator *CAO = cast<CompoundAssignOperator>(E); + LValue LHSLV; + EmitCompoundAssignmentOperands(CAO, LHSLV, LHS, RHS); + LHS = EmitSub(LHS, RHS, CAO->getComputationType()); + return EmitCompoundAssignmentResult(CAO, LHSLV, LHS); + } + case BinaryOperator::ShlAssign: { + const CompoundAssignOperator *CAO = cast<CompoundAssignOperator>(E); + LValue LHSLV; + EmitCompoundAssignmentOperands(CAO, LHSLV, LHS, RHS); + LHS = EmitShl(LHS, RHS, CAO->getComputationType()); + return EmitCompoundAssignmentResult(CAO, LHSLV, LHS); + } + case BinaryOperator::ShrAssign: { + const CompoundAssignOperator *CAO = cast<CompoundAssignOperator>(E); + LValue LHSLV; + EmitCompoundAssignmentOperands(CAO, LHSLV, LHS, RHS); + LHS = EmitShr(LHS, RHS, CAO->getComputationType()); + return EmitCompoundAssignmentResult(CAO, LHSLV, LHS); + } + case BinaryOperator::AndAssign: { + const CompoundAssignOperator *CAO = cast<CompoundAssignOperator>(E); + LValue LHSLV; + EmitCompoundAssignmentOperands(CAO, LHSLV, LHS, RHS); + LHS = EmitAnd(LHS, RHS, CAO->getComputationType()); + return EmitCompoundAssignmentResult(CAO, LHSLV, LHS); + } + case BinaryOperator::OrAssign: { + const CompoundAssignOperator *CAO = cast<CompoundAssignOperator>(E); + LValue LHSLV; + EmitCompoundAssignmentOperands(CAO, LHSLV, LHS, RHS); + LHS = EmitOr(LHS, RHS, CAO->getComputationType()); + return EmitCompoundAssignmentResult(CAO, LHSLV, LHS); + } + case BinaryOperator::XorAssign: { + const CompoundAssignOperator *CAO = cast<CompoundAssignOperator>(E); + LValue LHSLV; + EmitCompoundAssignmentOperands(CAO, LHSLV, LHS, RHS); + LHS = EmitXor(LHS, RHS, CAO->getComputationType()); + return EmitCompoundAssignmentResult(CAO, LHSLV, LHS); + } + case BinaryOperator::Comma: return EmitBinaryComma(E); + } +} + +RValue CodeGenFunction::EmitMul(RValue LHS, RValue RHS, QualType ResTy) { + if (LHS.isScalar()) + return RValue::get(Builder.CreateMul(LHS.getVal(), RHS.getVal(), "mul")); + + assert(0 && "FIXME: This doesn't handle complex operands yet"); +} + +RValue CodeGenFunction::EmitDiv(RValue LHS, RValue RHS, QualType ResTy) { + if (LHS.isScalar()) { + llvm::Value *RV; + if (LHS.getVal()->getType()->isFloatingPoint()) + RV = Builder.CreateFDiv(LHS.getVal(), RHS.getVal(), "div"); + else if (ResTy->isUnsignedIntegerType()) + RV = Builder.CreateUDiv(LHS.getVal(), RHS.getVal(), "div"); + else + RV = Builder.CreateSDiv(LHS.getVal(), RHS.getVal(), "div"); + return RValue::get(RV); + } + assert(0 && "FIXME: This doesn't handle complex operands yet"); +} + +RValue CodeGenFunction::EmitRem(RValue LHS, RValue RHS, QualType ResTy) { + if (LHS.isScalar()) { + llvm::Value *RV; + // Rem in C can't be a floating point type: C99 6.5.5p2. + if (ResTy->isUnsignedIntegerType()) + RV = Builder.CreateURem(LHS.getVal(), RHS.getVal(), "rem"); + else + RV = Builder.CreateSRem(LHS.getVal(), RHS.getVal(), "rem"); + return RValue::get(RV); + } + + assert(0 && "FIXME: This doesn't handle complex operands yet"); +} + +RValue CodeGenFunction::EmitAdd(RValue LHS, RValue RHS, QualType ResTy) { + if (LHS.isScalar()) + return RValue::get(Builder.CreateAdd(LHS.getVal(), RHS.getVal(), "add")); + + // Otherwise, this must be a complex number. + llvm::Value *LHSR, *LHSI, *RHSR, *RHSI; + + EmitLoadOfComplex(LHS, LHSR, LHSI); + EmitLoadOfComplex(RHS, RHSR, RHSI); + + llvm::Value *ResR = Builder.CreateAdd(LHSR, RHSR, "add.r"); + llvm::Value *ResI = Builder.CreateAdd(LHSI, RHSI, "add.i"); + + llvm::Value *Res = CreateTempAlloca(ConvertType(ResTy)); + EmitStoreOfComplex(ResR, ResI, Res); + return RValue::getAggregate(Res); +} + +RValue CodeGenFunction::EmitSub(RValue LHS, RValue RHS, QualType ResTy) { + if (LHS.isScalar()) + return RValue::get(Builder.CreateSub(LHS.getVal(), RHS.getVal(), "sub")); + + assert(0 && "FIXME: This doesn't handle complex operands yet"); +} + +void CodeGenFunction::EmitShiftOperands(const BinaryOperator *E, + RValue &LHS, RValue &RHS) { + // For shifts, integer promotions are performed, but the usual arithmetic + // conversions are not. The LHS and RHS need not have the same type. + QualType ResTy; + LHS = EmitExprWithUsualUnaryConversions(E->getLHS(), ResTy); + RHS = EmitExprWithUsualUnaryConversions(E->getRHS(), ResTy); +} + + +RValue CodeGenFunction::EmitShl(RValue LHSV, RValue RHSV, QualType ResTy) { + llvm::Value *LHS = LHSV.getVal(), *RHS = RHSV.getVal(); + + // LLVM requires the LHS and RHS to be the same type, promote or truncate the + // RHS to the same size as the LHS. + if (LHS->getType() != RHS->getType()) + RHS = Builder.CreateIntCast(RHS, LHS->getType(), false, "sh_prom"); + + return RValue::get(Builder.CreateShl(LHS, RHS, "shl")); +} + +RValue CodeGenFunction::EmitShr(RValue LHSV, RValue RHSV, QualType ResTy) { + llvm::Value *LHS = LHSV.getVal(), *RHS = RHSV.getVal(); + + // LLVM requires the LHS and RHS to be the same type, promote or truncate the + // RHS to the same size as the LHS. + if (LHS->getType() != RHS->getType()) + RHS = Builder.CreateIntCast(RHS, LHS->getType(), false, "sh_prom"); + + if (ResTy->isUnsignedIntegerType()) + return RValue::get(Builder.CreateLShr(LHS, RHS, "shr")); + else + return RValue::get(Builder.CreateAShr(LHS, RHS, "shr")); +} + +RValue CodeGenFunction::EmitBinaryCompare(const BinaryOperator *E, + unsigned UICmpOpc, unsigned SICmpOpc, + unsigned FCmpOpc) { + RValue LHS, RHS; + EmitUsualArithmeticConversions(E, LHS, RHS); + + llvm::Value *Result; + if (LHS.isScalar()) { + if (LHS.getVal()->getType()->isFloatingPoint()) { + Result = Builder.CreateFCmp((llvm::FCmpInst::Predicate)FCmpOpc, + LHS.getVal(), RHS.getVal(), "cmp"); + } else if (E->getLHS()->getType()->isUnsignedIntegerType()) { + // FIXME: This check isn't right for "unsigned short < int" where ushort + // promotes to int and does a signed compare. + Result = Builder.CreateICmp((llvm::ICmpInst::Predicate)UICmpOpc, + LHS.getVal(), RHS.getVal(), "cmp"); + } else { + // Signed integers and pointers. + Result = Builder.CreateICmp((llvm::ICmpInst::Predicate)SICmpOpc, + LHS.getVal(), RHS.getVal(), "cmp"); + } + } else { + // Struct/union/complex + assert(0 && "Aggregate comparisons not implemented yet!"); + } + + // ZExt result to int. + return RValue::get(Builder.CreateZExt(Result, LLVMIntTy, "cmp.ext")); +} + +RValue CodeGenFunction::EmitAnd(RValue LHS, RValue RHS, QualType ResTy) { + if (LHS.isScalar()) + return RValue::get(Builder.CreateAnd(LHS.getVal(), RHS.getVal(), "and")); + + assert(0 && "FIXME: This doesn't handle complex integer operands yet (GNU)"); +} + +RValue CodeGenFunction::EmitXor(RValue LHS, RValue RHS, QualType ResTy) { + if (LHS.isScalar()) + return RValue::get(Builder.CreateXor(LHS.getVal(), RHS.getVal(), "xor")); + + assert(0 && "FIXME: This doesn't handle complex integer operands yet (GNU)"); +} + +RValue CodeGenFunction::EmitOr(RValue LHS, RValue RHS, QualType ResTy) { + if (LHS.isScalar()) + return RValue::get(Builder.CreateOr(LHS.getVal(), RHS.getVal(), "or")); + + assert(0 && "FIXME: This doesn't handle complex integer operands yet (GNU)"); +} + +RValue CodeGenFunction::EmitBinaryLAnd(const BinaryOperator *E) { + llvm::Value *LHSCond = EvaluateExprAsBool(E->getLHS()); + + llvm::BasicBlock *ContBlock = new llvm::BasicBlock("land_cont"); + llvm::BasicBlock *RHSBlock = new llvm::BasicBlock("land_rhs"); + + llvm::BasicBlock *OrigBlock = Builder.GetInsertBlock(); + Builder.CreateCondBr(LHSCond, RHSBlock, ContBlock); + + EmitBlock(RHSBlock); + llvm::Value *RHSCond = EvaluateExprAsBool(E->getRHS()); + + // Reaquire the RHS block, as there may be subblocks inserted. + RHSBlock = Builder.GetInsertBlock(); + EmitBlock(ContBlock); + + // Create a PHI node. If we just evaluted the LHS condition, the result is + // false. If we evaluated both, the result is the RHS condition. + llvm::PHINode *PN = Builder.CreatePHI(llvm::Type::Int1Ty, "land"); + PN->reserveOperandSpace(2); + PN->addIncoming(llvm::ConstantInt::getFalse(), OrigBlock); + PN->addIncoming(RHSCond, RHSBlock); + + // ZExt result to int. + return RValue::get(Builder.CreateZExt(PN, LLVMIntTy, "land.ext")); +} + +RValue CodeGenFunction::EmitBinaryLOr(const BinaryOperator *E) { + llvm::Value *LHSCond = EvaluateExprAsBool(E->getLHS()); + + llvm::BasicBlock *ContBlock = new llvm::BasicBlock("lor_cont"); + llvm::BasicBlock *RHSBlock = new llvm::BasicBlock("lor_rhs"); + + llvm::BasicBlock *OrigBlock = Builder.GetInsertBlock(); + Builder.CreateCondBr(LHSCond, ContBlock, RHSBlock); + + EmitBlock(RHSBlock); + llvm::Value *RHSCond = EvaluateExprAsBool(E->getRHS()); + + // Reaquire the RHS block, as there may be subblocks inserted. + RHSBlock = Builder.GetInsertBlock(); + EmitBlock(ContBlock); + + // Create a PHI node. If we just evaluted the LHS condition, the result is + // true. If we evaluated both, the result is the RHS condition. + llvm::PHINode *PN = Builder.CreatePHI(llvm::Type::Int1Ty, "lor"); + PN->reserveOperandSpace(2); + PN->addIncoming(llvm::ConstantInt::getTrue(), OrigBlock); + PN->addIncoming(RHSCond, RHSBlock); + + // ZExt result to int. + return RValue::get(Builder.CreateZExt(PN, LLVMIntTy, "lor.ext")); +} + +RValue CodeGenFunction::EmitBinaryAssign(const BinaryOperator *E) { + LValue LHS = EmitLValue(E->getLHS()); + + QualType RHSTy; + RValue RHS = EmitExprWithUsualUnaryConversions(E->getRHS(), RHSTy); + + // Convert the RHS to the type of the LHS. + RHS = EmitConversion(RHS, RHSTy, E->getType()); + + // Store the value into the LHS. + EmitStoreThroughLValue(RHS, LHS, E->getType()); + + // Return the converted RHS. + return RHS; +} + + +RValue CodeGenFunction::EmitBinaryComma(const BinaryOperator *E) { + EmitExpr(E->getLHS()); + return EmitExpr(E->getRHS()); +} diff --git a/CodeGen/CGStmt.cpp b/CodeGen/CGStmt.cpp new file mode 100644 index 0000000000..fb2f7841f2 --- /dev/null +++ b/CodeGen/CGStmt.cpp @@ -0,0 +1,284 @@ +//===--- CGStmt.cpp - Emit LLVM Code from Statements ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This contains code to emit Stmt nodes as LLVM code. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "clang/AST/AST.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +using namespace clang; +using namespace CodeGen; + +//===----------------------------------------------------------------------===// +// Statement Emission +//===----------------------------------------------------------------------===// + +void CodeGenFunction::EmitStmt(const Stmt *S) { + assert(S && "Null statement?"); + + switch (S->getStmtClass()) { + default: + // Must be an expression in a stmt context. Emit the value and ignore the + // result. + if (const Expr *E = dyn_cast<Expr>(S)) { + EmitExpr(E); + } else { + printf("Unimplemented stmt!\n"); + S->dump(); + } + break; + case Stmt::NullStmtClass: break; + case Stmt::CompoundStmtClass: EmitCompoundStmt(cast<CompoundStmt>(*S)); break; + case Stmt::LabelStmtClass: EmitLabelStmt(cast<LabelStmt>(*S)); break; + case Stmt::GotoStmtClass: EmitGotoStmt(cast<GotoStmt>(*S)); break; + + case Stmt::IfStmtClass: EmitIfStmt(cast<IfStmt>(*S)); break; + case Stmt::WhileStmtClass: EmitWhileStmt(cast<WhileStmt>(*S)); break; + case Stmt::DoStmtClass: EmitDoStmt(cast<DoStmt>(*S)); break; + case Stmt::ForStmtClass: EmitForStmt(cast<ForStmt>(*S)); break; + + case Stmt::ReturnStmtClass: EmitReturnStmt(cast<ReturnStmt>(*S)); break; + case Stmt::DeclStmtClass: EmitDeclStmt(cast<DeclStmt>(*S)); break; + } +} + +void CodeGenFunction::EmitCompoundStmt(const CompoundStmt &S) { + // FIXME: handle vla's etc. + + for (CompoundStmt::const_body_iterator I = S.body_begin(), E = S.body_end(); + I != E; ++I) + EmitStmt(*I); +} + +void CodeGenFunction::EmitBlock(llvm::BasicBlock *BB) { + // Emit a branch from this block to the next one if this was a real block. If + // this was just a fall-through block after a terminator, don't emit it. + llvm::BasicBlock *LastBB = Builder.GetInsertBlock(); + + if (LastBB->getTerminator()) { + // If the previous block is already terminated, don't touch it. + } else if (LastBB->empty() && LastBB->getValueName() == 0) { + // If the last block was an empty placeholder, remove it now. + // TODO: cache and reuse these. + Builder.GetInsertBlock()->eraseFromParent(); + } else { + // Otherwise, create a fall-through branch. + Builder.CreateBr(BB); + } + CurFn->getBasicBlockList().push_back(BB); + Builder.SetInsertPoint(BB); +} + +void CodeGenFunction::EmitLabelStmt(const LabelStmt &S) { + llvm::BasicBlock *NextBB = getBasicBlockForLabel(&S); + + EmitBlock(NextBB); + EmitStmt(S.getSubStmt()); +} + +void CodeGenFunction::EmitGotoStmt(const GotoStmt &S) { + Builder.CreateBr(getBasicBlockForLabel(S.getLabel())); + + // Emit a block after the branch so that dead code after a goto has some place + // to go. + Builder.SetInsertPoint(new llvm::BasicBlock("", CurFn)); +} + +void CodeGenFunction::EmitIfStmt(const IfStmt &S) { + // C99 6.8.4.1: The first substatement is executed if the expression compares + // unequal to 0. The condition must be a scalar type. + llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond()); + + llvm::BasicBlock *ContBlock = new llvm::BasicBlock("ifend"); + llvm::BasicBlock *ThenBlock = new llvm::BasicBlock("ifthen"); + llvm::BasicBlock *ElseBlock = ContBlock; + + if (S.getElse()) + ElseBlock = new llvm::BasicBlock("ifelse"); + + // Insert the conditional branch. + Builder.CreateCondBr(BoolCondVal, ThenBlock, ElseBlock); + + // Emit the 'then' code. + EmitBlock(ThenBlock); + EmitStmt(S.getThen()); + Builder.CreateBr(ContBlock); + + // Emit the 'else' code if present. + if (const Stmt *Else = S.getElse()) { + EmitBlock(ElseBlock); + EmitStmt(Else); + Builder.CreateBr(ContBlock); + } + + // Emit the continuation block for code after the if. + EmitBlock(ContBlock); +} + +void CodeGenFunction::EmitWhileStmt(const WhileStmt &S) { + // FIXME: Handle continue/break. + + // Emit the header for the loop, insert it, which will create an uncond br to + // it. + llvm::BasicBlock *LoopHeader = new llvm::BasicBlock("whilecond"); + EmitBlock(LoopHeader); + + // Evaluate the conditional in the while header. C99 6.8.5.1: The evaluation + // of the controlling expression takes place before each execution of the loop + // body. + llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond()); + + // TODO: while(1) is common, avoid extra exit blocks, etc. Be sure + // to correctly handle break/continue though. + + // Create an exit block for when the condition fails, create a block for the + // body of the loop. + llvm::BasicBlock *ExitBlock = new llvm::BasicBlock("whileexit"); + llvm::BasicBlock *LoopBody = new llvm::BasicBlock("whilebody"); + + // As long as the condition is true, go to the loop body. + Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock); + + // Emit the loop body. + EmitBlock(LoopBody); + EmitStmt(S.getBody()); + + // Cycle to the condition. + Builder.CreateBr(LoopHeader); + + // Emit the exit block. + EmitBlock(ExitBlock); +} + +void CodeGenFunction::EmitDoStmt(const DoStmt &S) { + // FIXME: Handle continue/break. + // TODO: "do {} while (0)" is common in macros, avoid extra blocks. Be sure + // to correctly handle break/continue though. + + // Emit the body for the loop, insert it, which will create an uncond br to + // it. + llvm::BasicBlock *LoopBody = new llvm::BasicBlock("dobody"); + llvm::BasicBlock *AfterDo = new llvm::BasicBlock("afterdo"); + EmitBlock(LoopBody); + + // Emit the body of the loop into the block. + EmitStmt(S.getBody()); + + // C99 6.8.5.2: "The evaluation of the controlling expression takes place + // after each execution of the loop body." + + // Evaluate the conditional in the while header. + // C99 6.8.5p2/p4: The first substatement is executed if the expression + // compares unequal to 0. The condition must be a scalar type. + llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond()); + + // As long as the condition is true, iterate the loop. + Builder.CreateCondBr(BoolCondVal, LoopBody, AfterDo); + + // Emit the exit block. + EmitBlock(AfterDo); +} + +void CodeGenFunction::EmitForStmt(const ForStmt &S) { + // FIXME: Handle continue/break. + // FIXME: What do we do if the increment (f.e.) contains a stmt expression, + // which contains a continue/break? + + // Evaluate the first part before the loop. + if (S.getInit()) + EmitStmt(S.getInit()); + + // Start the loop with a block that tests the condition. + llvm::BasicBlock *CondBlock = new llvm::BasicBlock("forcond"); + llvm::BasicBlock *AfterFor = 0; + EmitBlock(CondBlock); + + // Evaluate the condition if present. If not, treat it as a non-zero-constant + // according to 6.8.5.3p2, aka, true. + if (S.getCond()) { + // C99 6.8.5p2/p4: The first substatement is executed if the expression + // compares unequal to 0. The condition must be a scalar type. + llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond()); + + // As long as the condition is true, iterate the loop. + llvm::BasicBlock *ForBody = new llvm::BasicBlock("forbody"); + AfterFor = new llvm::BasicBlock("afterfor"); + Builder.CreateCondBr(BoolCondVal, ForBody, AfterFor); + EmitBlock(ForBody); + } else { + // Treat it as a non-zero constant. Don't even create a new block for the + // body, just fall into it. + } + + // If the condition is true, execute the body of the for stmt. + EmitStmt(S.getBody()); + + // If there is an increment, emit it next. + if (S.getInc()) + EmitExpr(S.getInc()); + + // Finally, branch back up to the condition for the next iteration. + Builder.CreateBr(CondBlock); + + // Emit the fall-through block if there is any. + if (AfterFor) + EmitBlock(AfterFor); + else + EmitBlock(new llvm::BasicBlock()); +} + +/// EmitReturnStmt - Note that due to GCC extensions, this can have an operand +/// if the function returns void, or may be missing one if the function returns +/// non-void. Fun stuff :). +void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { + RValue RetVal; + + // Emit the result value, even if unused, to evalute the side effects. + const Expr *RV = S.getRetValue(); + if (RV) + RetVal = EmitExpr(RV); + + QualType FnRetTy = CurFuncDecl->getType().getCanonicalType(); + FnRetTy = cast<FunctionType>(FnRetTy)->getResultType(); + + if (FnRetTy->isVoidType()) { + // If the function returns void, emit ret void, and ignore the retval. + Builder.CreateRetVoid(); + } else if (RV == 0) { + // "return;" in a function that returns a value. + const llvm::Type *RetTy = CurFn->getFunctionType()->getReturnType(); + if (RetTy == llvm::Type::VoidTy) + Builder.CreateRetVoid(); // struct return etc. + else + Builder.CreateRet(llvm::UndefValue::get(RetTy)); + } else { + // Do implicit conversions to the returned type. + RetVal = EmitConversion(RetVal, RV->getType(), FnRetTy); + + if (RetVal.isScalar()) { + Builder.CreateRet(RetVal.getVal()); + } else { + llvm::Value *SRetPtr = CurFn->arg_begin(); + EmitStoreThroughLValue(RetVal, LValue::MakeAddr(SRetPtr), FnRetTy); + } + } + + // Emit a block after the branch so that dead code after a return has some + // place to go. + EmitBlock(new llvm::BasicBlock()); +} + +void CodeGenFunction::EmitDeclStmt(const DeclStmt &S) { + for (const Decl *Decl = S.getDecl(); Decl; Decl = Decl->getNextDeclarator()) + EmitDecl(*Decl); +}
\ No newline at end of file diff --git a/CodeGen/CodeGenFunction.cpp b/CodeGen/CodeGenFunction.cpp new file mode 100644 index 0000000000..662c3b513c --- /dev/null +++ b/CodeGen/CodeGenFunction.cpp @@ -0,0 +1,98 @@ +//===--- CodeGenFunction.cpp - Emit LLVM Code from ASTs for a Function ----===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This coordinates the per-function state used while generating code. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "CodeGenModule.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/AST/AST.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Analysis/Verifier.h" +using namespace clang; +using namespace CodeGen; + +CodeGenFunction::CodeGenFunction(CodeGenModule &cgm) + : CGM(cgm), Target(CGM.getContext().Target) {} + +ASTContext &CodeGenFunction::getContext() const { + return CGM.getContext(); +} + + +llvm::BasicBlock *CodeGenFunction::getBasicBlockForLabel(const LabelStmt *S) { + llvm::BasicBlock *&BB = LabelMap[S]; + if (BB) return BB; + + // Create, but don't insert, the new block. + return BB = new llvm::BasicBlock(S->getName()); +} + + +const llvm::Type *CodeGenFunction::ConvertType(QualType T) { + return CGM.getTypes().ConvertType(T); +} + +bool CodeGenFunction::hasAggregateLLVMType(QualType T) { + return !T->isRealType() && !T->isPointerType() && !T->isVoidType() && + !T->isVectorType(); +} + + +void CodeGenFunction::GenerateCode(const FunctionDecl *FD) { + LLVMIntTy = ConvertType(getContext().IntTy); + LLVMPointerWidth = Target.getPointerWidth(SourceLocation()); + + CurFn = cast<llvm::Function>(CGM.GetAddrOfGlobalDecl(FD)); + CurFuncDecl = FD; + + // TODO: Set up linkage and many other things. + assert(CurFn->isDeclaration() && "Function already has body?"); + + llvm::BasicBlock *EntryBB = new llvm::BasicBlock("entry", CurFn); + + Builder.SetInsertPoint(EntryBB); + + // Create a marker to make it easy to insert allocas into the entryblock + // later. + llvm::Value *Undef = llvm::UndefValue::get(llvm::Type::Int32Ty); + AllocaInsertPt = Builder.CreateBitCast(Undef,llvm::Type::Int32Ty, "allocapt"); + + // Emit allocs for param decls. Give the LLVM Argument nodes names. + llvm::Function::arg_iterator AI = CurFn->arg_begin(); + + // Name the struct return argument. + if (hasAggregateLLVMType(FD->getResultType())) { + AI->setName("agg.result"); + ++AI; + } + + for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i, ++AI) { + assert(AI != CurFn->arg_end() && "Argument mismatch!"); + EmitParmDecl(*FD->getParamDecl(i), AI); + } + + // Emit the function body. + EmitStmt(FD->getBody()); + + // Emit a return for code that falls off the end. + // FIXME: if this is C++ main, this should return 0. + if (CurFn->getReturnType() == llvm::Type::VoidTy) + Builder.CreateRetVoid(); + else + Builder.CreateRet(llvm::UndefValue::get(CurFn->getReturnType())); + + // Verify that the function is well formed. + assert(!verifyFunction(*CurFn)); +} + diff --git a/CodeGen/CodeGenFunction.h b/CodeGen/CodeGenFunction.h new file mode 100644 index 0000000000..acefedf3cc --- /dev/null +++ b/CodeGen/CodeGenFunction.h @@ -0,0 +1,354 @@ +//===--- CodeGenFunction.h - Per-Function state for LLVM CodeGen ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the internal per-function state used for llvm translation. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_CODEGENFUNCTION_H +#define CODEGEN_CODEGENFUNCTION_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/LLVMBuilder.h" +#include <vector> + +namespace llvm { + class Module; +} + +namespace clang { + class ASTContext; + class Decl; + class FunctionDecl; + class TargetInfo; + class QualType; + class FunctionTypeProto; + + class Stmt; + class CompoundStmt; + class LabelStmt; + class GotoStmt; + class IfStmt; + class WhileStmt; + class DoStmt; + class ForStmt; + class ReturnStmt; + class DeclStmt; + + class Expr; + class DeclRefExpr; + class StringLiteral; + class IntegerLiteral; + class FloatingLiteral; + class CastExpr; + class CallExpr; + class UnaryOperator; + class BinaryOperator; + class CompoundAssignOperator; + class ArraySubscriptExpr; + + class BlockVarDecl; + class EnumConstantDecl; + class ParmVarDecl; +namespace CodeGen { + class CodeGenModule; + + +/// RValue - This trivial value class is used to represent the result of an +/// expression that is evaluated. It can be one of two things: either a simple +/// LLVM SSA value, or the address of an aggregate value in memory. These two +/// possibilities are discriminated by isAggregate/isScalar. +class RValue { + llvm::Value *V; + // TODO: Encode this into the low bit of pointer for more efficient + // return-by-value. + bool IsAggregate; + + // FIXME: Aggregate rvalues need to retain information about whether they are + // volatile or not. +public: + + bool isAggregate() const { return IsAggregate; } + bool isScalar() const { return !IsAggregate; } + + /// getVal() - Return the Value* of this scalar value. + llvm::Value *getVal() const { + assert(!isAggregate() && "Not a scalar!"); + return V; + } + + /// getAggregateAddr() - Return the Value* of the address of the aggregate. + llvm::Value *getAggregateAddr() const { + assert(isAggregate() && "Not an aggregate!"); + return V; + } + + static RValue get(llvm::Value *V) { + RValue ER; + ER.V = V; + ER.IsAggregate = false; + return ER; + } + static RValue getAggregate(llvm::Value *V) { + RValue ER; + ER.V = V; + ER.IsAggregate = true; + return ER; + } +}; + + +/// LValue - This represents an lvalue references. Because C/C++ allow +/// bitfields, this is not a simple LLVM pointer, it may be a pointer plus a +/// bitrange. +class LValue { + // FIXME: Volatility. Restrict? + // alignment? + + enum { + Simple, // This is a normal l-value, use getAddress(). + VectorElt, // This is a vector element l-value (V[i]), use getVector* + BitField // This is a bitfield l-value, use getBitfield*. + } LVType; + + llvm::Value *V; + + union { + llvm::Value *VectorIdx; + }; +public: + bool isSimple() const { return LVType == Simple; } + bool isVectorElt() const { return LVType == VectorElt; } + bool isBitfield() const { return LVType == BitField; } + + // simple lvalue + llvm::Value *getAddress() const { assert(isSimple()); return V; } + // vector elt lvalue + llvm::Value *getVectorAddr() const { assert(isVectorElt()); return V; } + llvm::Value *getVectorIdx() const { assert(isVectorElt()); return VectorIdx; } + + static LValue MakeAddr(llvm::Value *V) { + LValue R; + R.LVType = Simple; + R.V = V; + return R; + } + + static LValue MakeVectorElt(llvm::Value *Vec, llvm::Value *Idx) { + LValue R; + R.LVType = VectorElt; + R.V = Vec; + R.VectorIdx = Idx; + return R; + } + +}; + +/// CodeGenFunction - This class organizes the per-function state that is used +/// while generating LLVM code. +class CodeGenFunction { + CodeGenModule &CGM; // Per-module state. + TargetInfo &Target; + llvm::LLVMBuilder Builder; + + const FunctionDecl *CurFuncDecl; + llvm::Function *CurFn; + + /// AllocaInsertPoint - This is an instruction in the entry block before which + /// we prefer to insert allocas. + llvm::Instruction *AllocaInsertPt; + + const llvm::Type *LLVMIntTy; + unsigned LLVMPointerWidth; + + /// LocalDeclMap - This keeps track of the LLVM allocas or globals for local C + /// decls. + llvm::DenseMap<const Decl*, llvm::Value*> LocalDeclMap; + + /// LabelMap - This keeps track of the LLVM basic block for each C label. + llvm::DenseMap<const LabelStmt*, llvm::BasicBlock*> LabelMap; +public: + CodeGenFunction(CodeGenModule &cgm); + + ASTContext &getContext() const; + + void GenerateCode(const FunctionDecl *FD); + + const llvm::Type *ConvertType(QualType T); + + /// hasAggregateLLVMType - Return true if the specified AST type will map into + /// an aggregate LLVM type or is void. + static bool hasAggregateLLVMType(QualType T); + + /// getBasicBlockForLabel - Return the LLVM basicblock that the specified + /// label maps to. + llvm::BasicBlock *getBasicBlockForLabel(const LabelStmt *S); + + + void EmitBlock(llvm::BasicBlock *BB); + + //===--------------------------------------------------------------------===// + // Helpers + //===--------------------------------------------------------------------===// + + /// CreateTempAlloca - This creates a alloca and inserts it into the entry + /// block. + llvm::AllocaInst *CreateTempAlloca(const llvm::Type *Ty, + const char *Name = "tmp"); + + /// EvaluateExprAsBool - Perform the usual unary conversions on the specified + /// expression and compare the result against zero, returning an Int1Ty value. + llvm::Value *EvaluateExprAsBool(const Expr *E); + + + /// EmitLoadOfComplex - Given an RValue reference for a complex, emit code to + /// load the real and imaginary pieces, returning them as Real/Imag. + void EmitLoadOfComplex(RValue V, llvm::Value *&Real, llvm::Value *&Imag); + + /// EmitStoreOfComplex - Store the specified real/imag parts into the + /// specified value pointer. + void EmitStoreOfComplex(llvm::Value *Real, llvm::Value *Imag, + llvm::Value *ResPtr); + + //===--------------------------------------------------------------------===// + // Conversions + //===--------------------------------------------------------------------===// + + /// EmitConversion - Convert the value specied by Val, whose type is ValTy, to + /// the type specified by DstTy, following the rules of C99 6.3. + RValue EmitConversion(RValue Val, QualType ValTy, QualType DstTy); + + /// ConvertScalarValueToBool - Convert the specified expression value to a + /// boolean (i1) truth value. This is equivalent to "Val == 0". + llvm::Value *ConvertScalarValueToBool(RValue Val, QualType Ty); + + //===--------------------------------------------------------------------===// + // Declaration Emission + //===--------------------------------------------------------------------===// + + void EmitDecl(const Decl &D); + void EmitEnumConstantDecl(const EnumConstantDecl &D); + void EmitBlockVarDecl(const BlockVarDecl &D); + void EmitLocalBlockVarDecl(const BlockVarDecl &D); + void EmitParmDecl(const ParmVarDecl &D, llvm::Value *Arg); + + //===--------------------------------------------------------------------===// + // Statement Emission + //===--------------------------------------------------------------------===// + + void EmitStmt(const Stmt *S); + void EmitCompoundStmt(const CompoundStmt &S); + void EmitLabelStmt(const LabelStmt &S); + void EmitGotoStmt(const GotoStmt &S); + void EmitIfStmt(const IfStmt &S); + void EmitWhileStmt(const WhileStmt &S); + void EmitDoStmt(const DoStmt &S); + void EmitForStmt(const ForStmt &S); + void EmitReturnStmt(const ReturnStmt &S); + void EmitDeclStmt(const DeclStmt &S); + + //===--------------------------------------------------------------------===// + // LValue Expression Emission + //===--------------------------------------------------------------------===// + + /// EmitLValue - Emit code to compute a designator that specifies the location + /// of the expression. + /// + /// This can return one of two things: a simple address or a bitfield + /// reference. In either case, the LLVM Value* in the LValue structure is + /// guaranteed to be an LLVM pointer type. + /// + /// If this returns a bitfield reference, nothing about the pointee type of + /// the LLVM value is known: For example, it may not be a pointer to an + /// integer. + /// + /// If this returns a normal address, and if the lvalue's C type is fixed + /// size, this method guarantees that the returned pointer type will point to + /// an LLVM type of the same size of the lvalue's type. If the lvalue has a + /// variable length type, this is not possible. + /// + LValue EmitLValue(const Expr *E); + + /// EmitLoadOfLValue - Given an expression that represents a value lvalue, + /// this method emits the address of the lvalue, then loads the result as an + /// rvalue, returning the rvalue. + RValue EmitLoadOfLValue(const Expr *E); + RValue EmitLoadOfLValue(LValue V, QualType LVType); + + /// EmitStoreThroughLValue - Store the specified rvalue into the specified + /// lvalue, where both are guaranteed to the have the same type, and that type + /// is 'Ty'. + void EmitStoreThroughLValue(RValue Src, LValue Dst, QualType Ty); + + LValue EmitDeclRefLValue(const DeclRefExpr *E); + LValue EmitStringLiteralLValue(const StringLiteral *E); + LValue EmitUnaryOpLValue(const UnaryOperator *E); + LValue EmitArraySubscriptExpr(const ArraySubscriptExpr *E); + + //===--------------------------------------------------------------------===// + // Expression Emission + //===--------------------------------------------------------------------===// + + RValue EmitExprWithUsualUnaryConversions(const Expr *E, QualType &ResTy); + QualType EmitUsualArithmeticConversions(const BinaryOperator *E, + RValue &LHS, RValue &RHS); + void EmitShiftOperands(const BinaryOperator *E, RValue &LHS, RValue &RHS); + + void EmitCompoundAssignmentOperands(const CompoundAssignOperator *CAO, + LValue &LHSLV, RValue &LHS, RValue &RHS); + RValue EmitCompoundAssignmentResult(const CompoundAssignOperator *E, + LValue LHSLV, RValue ResV); + + + RValue EmitExpr(const Expr *E); + RValue EmitIntegerLiteral(const IntegerLiteral *E); + RValue EmitFloatingLiteral(const FloatingLiteral *E); + + RValue EmitCastExpr(const CastExpr *E); + RValue EmitCallExpr(const CallExpr *E); + RValue EmitArraySubscriptExprRV(const ArraySubscriptExpr *E); + + // Unary Operators. + RValue EmitUnaryOperator(const UnaryOperator *E); + // FIXME: pre/post inc/dec + RValue EmitUnaryAddrOf (const UnaryOperator *E); + RValue EmitUnaryPlus (const UnaryOperator *E); + RValue EmitUnaryMinus (const UnaryOperator *E); + RValue EmitUnaryNot (const UnaryOperator *E); + RValue EmitUnaryLNot (const UnaryOperator *E); + // FIXME: SIZEOF/ALIGNOF(expr). + // FIXME: real/imag + + // Binary Operators. + RValue EmitBinaryOperator(const BinaryOperator *E); + RValue EmitBinaryMul(const BinaryOperator *E); + RValue EmitBinaryDiv(const BinaryOperator *E); + RValue EmitBinaryRem(const BinaryOperator *E); + RValue EmitMul(RValue LHS, RValue RHS, QualType EltTy); + RValue EmitDiv(RValue LHS, RValue RHS, QualType EltTy); + RValue EmitRem(RValue LHS, RValue RHS, QualType EltTy); + RValue EmitAdd(RValue LHS, RValue RHS, QualType EltTy); + RValue EmitSub(RValue LHS, RValue RHS, QualType EltTy); + RValue EmitShl(RValue LHS, RValue RHS, QualType ResTy); + RValue EmitShr(RValue LHS, RValue RHS, QualType ResTy); + RValue EmitBinaryCompare(const BinaryOperator *E, unsigned UICmpOpc, + unsigned SICmpOpc, unsigned FCmpOpc); + RValue EmitAnd(RValue LHS, RValue RHS, QualType EltTy); + RValue EmitOr (RValue LHS, RValue RHS, QualType EltTy); + RValue EmitXor(RValue LHS, RValue RHS, QualType EltTy); + RValue EmitBinaryLAnd(const BinaryOperator *E); + RValue EmitBinaryLOr(const BinaryOperator *E); + + RValue EmitBinaryAssign(const BinaryOperator *E); + RValue EmitBinaryComma(const BinaryOperator *E); +}; +} // end namespace CodeGen +} // end namespace clang + +#endif diff --git a/CodeGen/CodeGenModule.cpp b/CodeGen/CodeGenModule.cpp new file mode 100644 index 0000000000..cdc3e63026 --- /dev/null +++ b/CodeGen/CodeGenModule.cpp @@ -0,0 +1,68 @@ +//===--- CodeGenModule.cpp - Emit LLVM Code from ASTs for a Module --------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This coordinates the per-module state used while generating code. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenModule.h" +#include "CodeGenFunction.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/GlobalVariable.h" +#include "llvm/Intrinsics.h" +using namespace clang; +using namespace CodeGen; + + +CodeGenModule::CodeGenModule(ASTContext &C, llvm::Module &M) + : Context(C), TheModule(M), Types(C.Target) {} + +llvm::Constant *CodeGenModule::GetAddrOfGlobalDecl(const Decl *D) { + // See if it is already in the map. + llvm::Constant *&Entry = GlobalDeclMap[D]; + if (Entry) return Entry; + + QualType ASTTy = cast<ValueDecl>(D)->getType(); + const llvm::Type *Ty = getTypes().ConvertType(ASTTy); + if (isa<FunctionDecl>(D)) { + const llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); + // FIXME: param attributes for sext/zext etc. + return Entry = new llvm::Function(FTy, llvm::Function::ExternalLinkage, + D->getName(), &getModule()); + } + + assert(isa<FileVarDecl>(D) && "Unknown global decl!"); + + return Entry = new llvm::GlobalVariable(Ty, false, + llvm::GlobalValue::ExternalLinkage, + 0, D->getName(), &getModule()); +} + +void CodeGenModule::EmitFunction(FunctionDecl *FD) { + // If this is not a prototype, emit the body. + if (FD->getBody()) + CodeGenFunction(*this).GenerateCode(FD); +} + + + +llvm::Function *CodeGenModule::getMemCpyFn() { + if (MemCpyFn) return MemCpyFn; + llvm::Intrinsic::ID IID; + switch (Context.Target.getPointerWidth(SourceLocation())) { + default: assert(0 && "Unknown ptr width"); + case 32: IID = llvm::Intrinsic::memcpy_i32; break; + case 64: IID = llvm::Intrinsic::memcpy_i64; break; + } + return MemCpyFn = llvm::Intrinsic::getDeclaration(&TheModule, IID); +} diff --git a/CodeGen/CodeGenModule.h b/CodeGen/CodeGenModule.h new file mode 100644 index 0000000000..885fb97a9f --- /dev/null +++ b/CodeGen/CodeGenModule.h @@ -0,0 +1,60 @@ +//===--- CodeGenModule.h - Per-Module state for LLVM CodeGen --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the internal per-translation-unit state used for llvm translation. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_CODEGENMODULE_H +#define CODEGEN_CODEGENMODULE_H + +#include "CodeGenTypes.h" +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + class Module; + class Constant; + class Function; +} + +namespace clang { + class ASTContext; + class FunctionDecl; + class Decl; + +namespace CodeGen { + +/// CodeGenModule - This class organizes the cross-module state that is used +/// while generating LLVM code. +class CodeGenModule { + ASTContext &Context; + llvm::Module &TheModule; + CodeGenTypes Types; + + llvm::Function *MemCpyFn; + llvm::DenseMap<const Decl*, llvm::Constant*> GlobalDeclMap; +public: + CodeGenModule(ASTContext &C, llvm::Module &M); + + ASTContext &getContext() const { return Context; } + llvm::Module &getModule() const { return TheModule; } + CodeGenTypes &getTypes() { return Types; } + + llvm::Constant *GetAddrOfGlobalDecl(const Decl *D); + + llvm::Function *getMemCpyFn(); + + void EmitFunction(FunctionDecl *FD); + + void PrintStats() {} +}; +} // end namespace CodeGen +} // end namespace clang + +#endif diff --git a/CodeGen/CodeGenTypes.cpp b/CodeGen/CodeGenTypes.cpp new file mode 100644 index 0000000000..f0b77908be --- /dev/null +++ b/CodeGen/CodeGenTypes.cpp @@ -0,0 +1,151 @@ +//===--- CodeGenTypes.cpp - Type translation for LLVM CodeGen -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the code that handles AST -> LLVM type lowering. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenTypes.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/AST/AST.h" +#include "llvm/DerivedTypes.h" + +using namespace clang; +using namespace CodeGen; + + +/// ConvertType - Convert the specified type to its LLVM form. +const llvm::Type *CodeGenTypes::ConvertType(QualType T) { + // FIXME: Cache these, move the CodeGenModule, expand, etc. + const clang::Type &Ty = *T.getCanonicalType(); + + switch (Ty.getTypeClass()) { + case Type::Builtin: { + switch (cast<BuiltinType>(Ty).getKind()) { + case BuiltinType::Void: + // LLVM void type can only be used as the result of a function call. Just + // map to the same as char. + case BuiltinType::Char_S: + case BuiltinType::Char_U: + case BuiltinType::SChar: + case BuiltinType::UChar: + return llvm::IntegerType::get(Target.getCharWidth(SourceLocation())); + + case BuiltinType::Bool: + // FIXME: This is very strange. We want scalars to be i1, but in memory + // they can be i1 or i32. Should the codegen handle this issue? + return llvm::Type::Int1Ty; + + case BuiltinType::Short: + case BuiltinType::UShort: + return llvm::IntegerType::get(Target.getShortWidth(SourceLocation())); + + case BuiltinType::Int: + case BuiltinType::UInt: + return llvm::IntegerType::get(Target.getIntWidth(SourceLocation())); + + case BuiltinType::Long: + case BuiltinType::ULong: + return llvm::IntegerType::get(Target.getLongWidth(SourceLocation())); + + case BuiltinType::LongLong: + case BuiltinType::ULongLong: + return llvm::IntegerType::get(Target.getLongLongWidth(SourceLocation())); + + case BuiltinType::Float: return llvm::Type::FloatTy; + case BuiltinType::Double: return llvm::Type::DoubleTy; + case BuiltinType::LongDouble: + // FIXME: mapping long double onto double. + return llvm::Type::DoubleTy; + } + break; + } + case Type::Complex: { + std::vector<const llvm::Type*> Elts; + Elts.push_back(ConvertType(cast<ComplexType>(Ty).getElementType())); + Elts.push_back(Elts[0]); + return llvm::StructType::get(Elts); + } + case Type::Pointer: { + const PointerType &P = cast<PointerType>(Ty); + return llvm::PointerType::get(ConvertType(P.getPointeeType())); + } + case Type::Reference: { + const ReferenceType &R = cast<ReferenceType>(Ty); + return llvm::PointerType::get(ConvertType(R.getReferenceeType())); + } + + case Type::Array: { + const ArrayType &A = cast<ArrayType>(Ty); + assert(A.getSizeModifier() == ArrayType::Normal && + A.getIndexTypeQualifier() == 0 && + "FIXME: We only handle trivial array types so far!"); + + llvm::APSInt Size(32); + if (A.getSize() && A.getSize()->isIntegerConstantExpr(Size)) { + const llvm::Type *EltTy = ConvertType(A.getElementType()); + return llvm::ArrayType::get(EltTy, Size.getZExtValue()); + } else { + assert(0 && "FIXME: VLAs not implemented yet!"); + } + } + case Type::Vector: { + const VectorType &VT = cast<VectorType>(Ty); + return llvm::VectorType::get(ConvertType(VT.getElementType()), + VT.getNumElements()); + } + case Type::FunctionNoProto: + case Type::FunctionProto: { + const FunctionType &FP = cast<FunctionType>(Ty); + const llvm::Type *ResultType; + + if (FP.getResultType()->isVoidType()) + ResultType = llvm::Type::VoidTy; // Result of function uses llvm void. + else + ResultType = ConvertType(FP.getResultType()); + + // FIXME: Convert argument types. + bool isVarArg; + std::vector<const llvm::Type*> ArgTys; + + // Struct return passes the struct byref. + if (!ResultType->isFirstClassType() && ResultType != llvm::Type::VoidTy) { + ArgTys.push_back(llvm::PointerType::get(ResultType)); + ResultType = llvm::Type::VoidTy; + } + + if (const FunctionTypeProto *FTP = dyn_cast<FunctionTypeProto>(&FP)) { + DecodeArgumentTypes(*FTP, ArgTys); + isVarArg = FTP->isVariadic(); + } else { + isVarArg = true; + } + + return llvm::FunctionType::get(ResultType, ArgTys, isVarArg, 0); + } + case Type::TypeName: + case Type::Tagged: + break; + } + + // FIXME: implement. + return llvm::OpaqueType::get(); +} + +void CodeGenTypes::DecodeArgumentTypes(const FunctionTypeProto &FTP, + std::vector<const llvm::Type*> &ArgTys) { + for (unsigned i = 0, e = FTP.getNumArgs(); i != e; ++i) { + const llvm::Type *Ty = ConvertType(FTP.getArgType(i)); + if (Ty->isFirstClassType()) + ArgTys.push_back(Ty); + else + ArgTys.push_back(llvm::PointerType::get(Ty)); + } +} + diff --git a/CodeGen/CodeGenTypes.h b/CodeGen/CodeGenTypes.h new file mode 100644 index 0000000000..dd322a19ec --- /dev/null +++ b/CodeGen/CodeGenTypes.h @@ -0,0 +1,47 @@ +//===--- CodeGenTypes.h - Type translation for LLVM CodeGen -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the code that handles AST -> LLVM type lowering. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_CODEGENTYPES_H +#define CODEGEN_CODEGENTYPES_H + +#include <vector> + +namespace llvm { + class Type; +} + +namespace clang { + class TargetInfo; + class QualType; + class FunctionTypeProto; + +namespace CodeGen { + +/// CodeGenTypes - This class organizes the cross-module state that is used +/// while lowering AST types to LLVM types. +class CodeGenTypes { + TargetInfo &Target; + +public: + CodeGenTypes(TargetInfo &target) : Target(target) {} + + TargetInfo &getTarget() const { return Target; } + + const llvm::Type *ConvertType(QualType T); + void DecodeArgumentTypes(const FunctionTypeProto &FTP, + std::vector<const llvm::Type*> &ArgTys); +}; +} // end namespace CodeGen +} // end namespace clang + +#endif diff --git a/CodeGen/Makefile b/CodeGen/Makefile new file mode 100644 index 0000000000..5e6f705846 --- /dev/null +++ b/CodeGen/Makefile @@ -0,0 +1,23 @@ +##===- clang/CodeGen/Makefile ------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file was developed by Chris Lattner and is distributed under +# the University of Illinois Open Source License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This implements the AST -> LLVM code generation library for the +# C-Language front-end. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME := clangCodeGen +BUILD_ARCHIVE = 1 +CXXFLAGS = -fno-rtti + +CPPFLAGS += -I$(PROJ_SRC_DIR)/../include + +include $(LEVEL)/Makefile.common + diff --git a/CodeGen/ModuleBuilder.cpp b/CodeGen/ModuleBuilder.cpp new file mode 100644 index 0000000000..16b13d1c4d --- /dev/null +++ b/CodeGen/ModuleBuilder.cpp @@ -0,0 +1,39 @@ +//===--- ModuleBuilder.cpp - Emit LLVM Code from ASTs ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This builds an AST and converts it to LLVM Code. +// +//===----------------------------------------------------------------------===// + +#include "clang/CodeGen/ModuleBuilder.h" +#include "CodeGenModule.h" +using namespace clang; + + +/// Init - Create an ModuleBuilder with the specified ASTContext. +clang::CodeGen::BuilderTy * +clang::CodeGen::Init(ASTContext &Context, llvm::Module &M) { + return new CodeGenModule(Context, M); +} + +void clang::CodeGen::Terminate(BuilderTy *B) { + delete static_cast<CodeGenModule*>(B); +} + +/// CodeGenFunction - Convert the AST node for a FunctionDecl into LLVM. +/// +void clang::CodeGen::CodeGenFunction(BuilderTy *B, FunctionDecl *D) { + static_cast<CodeGenModule*>(B)->EmitFunction(D); +} + +/// PrintStats - Emit statistic information to stderr. +/// +void clang::CodeGen::PrintStats(BuilderTy *B) { + static_cast<CodeGenModule*>(B)->PrintStats(); +} diff --git a/Driver/ASTStreamers.cpp b/Driver/ASTStreamers.cpp new file mode 100644 index 0000000000..19e12bd69e --- /dev/null +++ b/Driver/ASTStreamers.cpp @@ -0,0 +1,109 @@ +//===--- ASTStreamers.cpp - ASTStreamer Drivers ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Bill Wendling and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// ASTStreamer drivers. +// +//===----------------------------------------------------------------------===// + +#include "ASTStreamers.h" +#include "clang/AST/AST.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Sema/ASTStreamer.h" + +void clang::BuildASTs(Preprocessor &PP, unsigned MainFileID, bool Stats) { + // collect global stats on Decls/Stmts (until we have a module streamer) + if (Stats) { + Decl::CollectingStats(true); + Stmt::CollectingStats(true); + } + + ASTContext Context(PP.getTargetInfo(), PP.getIdentifierTable()); + ASTStreamerTy *Streamer = ASTStreamer_Init(PP, Context, MainFileID); + + while (ASTStreamer_ReadTopLevelDecl(Streamer)) + /* keep reading */; + + if (Stats) { + fprintf(stderr, "\nSTATISTICS:\n"); + ASTStreamer_PrintStats(Streamer); + Context.PrintStats(); + Decl::PrintStats(); + Stmt::PrintStats(); + } + + ASTStreamer_Terminate(Streamer); +} + +void clang::PrintFunctionDecl(FunctionDecl *FD) { + bool HasBody = FD->getBody(); + + std::string Proto = FD->getName(); + FunctionType *AFT = cast<FunctionType>(FD->getType()); + + if (FunctionTypeProto *FT = dyn_cast<FunctionTypeProto>(AFT)) { + Proto += "("; + for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) { + if (i) Proto += ", "; + std::string ParamStr; + if (HasBody) ParamStr = FD->getParamDecl(i)->getName(); + + FT->getArgType(i).getAsStringInternal(ParamStr); + Proto += ParamStr; + } + + if (FT->isVariadic()) { + if (FD->getNumParams()) Proto += ", "; + Proto += "..."; + } + Proto += ")"; + } else { + assert(isa<FunctionTypeNoProto>(AFT)); + Proto += "()"; + } + + AFT->getResultType().getAsStringInternal(Proto); + fprintf(stderr, "\n%s", Proto.c_str()); + + if (FD->getBody()) { + fprintf(stderr, " "); + FD->getBody()->dump(); + fprintf(stderr, "\n"); + } else { + fprintf(stderr, ";\n"); + } +} + +void clang::PrintTypeDefDecl(TypedefDecl *TD) { + std::string S = TD->getName(); + TD->getUnderlyingType().getAsStringInternal(S); + fprintf(stderr, "typedef %s;\n", S.c_str()); +} + +void clang::PrintASTs(Preprocessor &PP, unsigned MainFileID, bool Stats) { + ASTContext Context(PP.getTargetInfo(), PP.getIdentifierTable()); + ASTStreamerTy *Streamer = ASTStreamer_Init(PP, Context, MainFileID); + + while (Decl *D = ASTStreamer_ReadTopLevelDecl(Streamer)) { + if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) { + PrintFunctionDecl(FD); + } else if (TypedefDecl *TD = dyn_cast<TypedefDecl>(D)) { + PrintTypeDefDecl(TD); + } else { + fprintf(stderr, "Read top-level variable decl: '%s'\n", D->getName()); + } + } + + if (Stats) { + fprintf(stderr, "\nSTATISTICS:\n"); + ASTStreamer_PrintStats(Streamer); + Context.PrintStats(); + } + + ASTStreamer_Terminate(Streamer); +} diff --git a/Driver/ASTStreamers.h b/Driver/ASTStreamers.h new file mode 100644 index 0000000000..2cce217ce6 --- /dev/null +++ b/Driver/ASTStreamers.h @@ -0,0 +1,30 @@ +//===--- ASTStreamers.h - ASTStreamer Drivers -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Bill Wendling and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// AST Streamers. +// +//===----------------------------------------------------------------------===// + +#ifndef DRIVER_ASTSTREAMERS_H_ +#define DRIVER_ASTSTREAMERS_H_ + +namespace clang { + +class Preprocessor; +class FunctionDecl; +class TypedefDecl; + +void BuildASTs(Preprocessor &PP, unsigned MainFileID, bool Stats); +void PrintASTs(Preprocessor &PP, unsigned MainFileID, bool Stats); +void PrintFunctionDecl(FunctionDecl *FD); +void PrintTypeDefDecl(TypedefDecl *TD); + +} // end clang namespace + +#endif diff --git a/Driver/DiagChecker.cpp b/Driver/DiagChecker.cpp new file mode 100644 index 0000000000..76b0526d4e --- /dev/null +++ b/Driver/DiagChecker.cpp @@ -0,0 +1,230 @@ +//===--- DiagChecker.cpp - Diagnostic Checking Functions ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Bill Wendling and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Process the input files and check that the diagnostic messages are expected. +// +//===----------------------------------------------------------------------===// + +#include "clang.h" +#include "ASTStreamers.h" +#include "TextDiagnosticBuffer.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Preprocessor.h" +using namespace clang; + +typedef TextDiagnosticBuffer::DiagList DiagList; +typedef TextDiagnosticBuffer::const_iterator const_diag_iterator; + +// USING THE DIAGNOSTIC CHECKER: +// +// Indicating that a line expects an error or a warning is simple. Put a comment +// on the line that has the diagnostic, use "expected-{error,warning}" to tag +// if it's an expected error or warning, and place the expected text between {{ +// and }} markers. The full text doesn't have to be included, only enough to +// ensure that the correct diagnostic was emitted. +// +// Here's an example: +// +// int A = B; // expected-error {{use of undeclared identifier 'B'}} +// +// You can place as many diagnostics on one line as you wish. To make the code +// more readable, you can use slash-newline to separate out the diagnostics. + +static const char * const ExpectedErrStr = "expected-error"; +static const char * const ExpectedWarnStr = "expected-warning"; + +/// FindDiagnostics - Go through the comment and see if it indicates expected +/// diagnostics. If so, then put them in a diagnostic list. +/// +static void FindDiagnostics(const std::string &Comment, + DiagList &ExpectedDiags, + SourceManager &SourceMgr, + SourceLocation Pos, + const char * const ExpectedStr) { + // Find all expected diagnostics + typedef std::string::size_type size_type; + size_type ColNo = std::string::npos; + + for (;;) { + ColNo = Comment.find(ExpectedStr, ColNo); + if (ColNo == std::string::npos) break; + + size_type OpenDiag = Comment.find_first_of("{{", ColNo); + + if (OpenDiag == std::string::npos) { + fprintf(stderr, + "oops:%d: Cannot find beginning of expected error string\n", + SourceMgr.getLineNumber(Pos)); + break; + } + + OpenDiag += 2; + size_type CloseDiag = Comment.find_first_of("}}", OpenDiag); + + if (CloseDiag == std::string::npos) { + fprintf(stderr, + "oops:%d: Cannot find end of expected error string\n", + SourceMgr.getLineNumber(Pos)); + break; + } + + std::string Msg(Comment.substr(OpenDiag, CloseDiag - OpenDiag)); + ExpectedDiags.push_back(std::make_pair(Pos, Msg)); + ColNo = CloseDiag + 2; + } +} + +/// FindExpectedDiags - Lex the file to finds all of the expected errors and +/// warnings. +static void FindExpectedDiags(Preprocessor &PP, unsigned MainFileID, + DiagList &ExpectedErrors, + DiagList &ExpectedWarnings) { + // Return comments as tokens, this is how we find expected diagnostics. + PP.SetCommentRetentionState(true, true); + + // Enter the cave. + PP.EnterSourceFile(MainFileID, 0, true); + + LexerToken Tok; + do { + PP.Lex(Tok); + + if (Tok.getKind() == tok::comment) { + std::string Comment = PP.getSpelling(Tok); + + // Find all expected errors + FindDiagnostics(Comment, ExpectedErrors,PP.getSourceManager(), + Tok.getLocation(), ExpectedErrStr); + + // Find all expected warnings + FindDiagnostics(Comment, ExpectedWarnings, PP.getSourceManager(), + Tok.getLocation(), ExpectedWarnStr); + } + } while (Tok.getKind() != tok::eof); + + PP.SetCommentRetentionState(false, false); +} + +/// PrintProblem - This takes a diagnostic map of the delta between expected and +/// seen diagnostics. If there's anything in it, then something unexpected +/// happened. Print the map out in a nice format and return "true". If the map +/// is empty and we're not going to print things, then return "false". +/// +static bool PrintProblem(SourceManager &SourceMgr, + const_diag_iterator diag_begin, + const_diag_iterator diag_end, + const char *Msg) { + if (diag_begin == diag_end) return false; + + fprintf(stderr, "%s\n", Msg); + + for (const_diag_iterator I = diag_begin, E = diag_end; I != E; ++I) + fprintf(stderr, " Line %d: %s\n", + SourceMgr.getLineNumber(I->first), + I->second.c_str()); + + return true; +} + +/// CompareDiagLists - Compare two diangnostic lists and return the difference +/// between them. +/// +static bool CompareDiagLists(SourceManager &SourceMgr, + const_diag_iterator d1_begin, + const_diag_iterator d1_end, + const_diag_iterator d2_begin, + const_diag_iterator d2_end, + const char *Msg) { + DiagList DiffList; + + for (const_diag_iterator I = d1_begin, E = d1_end; I != E; ++I) { + unsigned LineNo1 = SourceMgr.getLineNumber(I->first); + const std::string &Diag1 = I->second; + bool Found = false; + + for (const_diag_iterator II = d2_begin, IE = d2_end; II != IE; ++II) { + unsigned LineNo2 = SourceMgr.getLineNumber(II->first); + if (LineNo1 != LineNo2) continue; + + const std::string &Diag2 = II->second; + if (Diag2.find(Diag1) != std::string::npos || + Diag1.find(Diag2) != std::string::npos) { + Found = true; + break; + } + } + + if (!Found) + DiffList.push_back(std::make_pair(I->first, Diag1)); + } + + return PrintProblem(SourceMgr, DiffList.begin(), DiffList.end(), Msg); +} + +/// CheckResults - This compares the expected results to those that +/// were actually reported. It emits any discrepencies. Return "true" if there +/// were problems. Return "false" otherwise. +/// +static bool CheckResults(Preprocessor &PP, + const DiagList &ExpectedErrors, + const DiagList &ExpectedWarnings) { + const TextDiagnosticBuffer &Diags = + static_cast<const TextDiagnosticBuffer&>(PP.getDiagnostics().getClient()); + SourceManager &SourceMgr = PP.getSourceManager(); + + // We want to capture the delta between what was expected and what was + // seen. + // + // Expected \ Seen - set expected but not seen + // Seen \ Expected - set seen but not expected + bool HadProblem = false; + + // See if there were errors that were expected but not seen. + HadProblem |= CompareDiagLists(SourceMgr, + ExpectedErrors.begin(), ExpectedErrors.end(), + Diags.err_begin(), Diags.err_end(), + "Errors expected but not seen:"); + + // See if there were errors that were seen but not expected. + HadProblem |= CompareDiagLists(SourceMgr, + Diags.err_begin(), Diags.err_end(), + ExpectedErrors.begin(), ExpectedErrors.end(), + "Errors seen but not expected:"); + + // See if there were warnings that were expected but not seen. + HadProblem |= CompareDiagLists(SourceMgr, + ExpectedWarnings.begin(), + ExpectedWarnings.end(), + Diags.warn_begin(), Diags.warn_end(), + "Warnings expected but not seen:"); + + // See if there were warnings that were seen but not expected. + HadProblem |= CompareDiagLists(SourceMgr, + Diags.warn_begin(), Diags.warn_end(), + ExpectedWarnings.begin(), + ExpectedWarnings.end(), + "Warnings seen but not expected:"); + + return HadProblem; +} + +/// CheckDiagnostics - Implement the -parse-ast-check diagnostic verifier. +bool clang::CheckDiagnostics(Preprocessor &PP, unsigned MainFileID) { + // Gather the set of expected diagnostics. + DiagList ExpectedErrors, ExpectedWarnings; + FindExpectedDiags(PP, MainFileID, ExpectedErrors, ExpectedWarnings); + + // Parse the specified input file. + BuildASTs(PP, MainFileID, false); + + // Check that the expected diagnostics occurred. + return CheckResults(PP, ExpectedErrors, ExpectedWarnings); +} + + diff --git a/Driver/LLVMCodegen.cpp b/Driver/LLVMCodegen.cpp new file mode 100644 index 0000000000..e593b66cfd --- /dev/null +++ b/Driver/LLVMCodegen.cpp @@ -0,0 +1,68 @@ +//===--- LLVMCodegen.cpp - Emit LLVM Code from ASTs -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This builds an AST and converts it to LLVM Code. +// +//===----------------------------------------------------------------------===// + +#include "clang.h" +#include "clang/CodeGen/ModuleBuilder.h" +#include "clang/Sema/ASTStreamer.h" +#include "clang/AST/AST.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/Diagnostic.h" +#include "llvm/Module.h" +#include <iostream> +using namespace clang; + +//===----------------------------------------------------------------------===// +// LLVM Emission +//===----------------------------------------------------------------------===// + +void clang::EmitLLVMFromASTs(Preprocessor &PP, unsigned MainFileID, + bool PrintStats) { + Diagnostic &Diags = PP.getDiagnostics(); + // Create the streamer to read the file. + ASTContext Context(PP.getTargetInfo(), PP.getIdentifierTable()); + ASTStreamerTy *Streamer = ASTStreamer_Init(PP, Context, MainFileID); + + // Create the module to codegen into. + llvm::Module M("foo"); + + CodeGen::BuilderTy *Builder = CodeGen::Init(Context, M); + + while (Decl *D = ASTStreamer_ReadTopLevelDecl(Streamer)) { + // If an error occurred, stop code generation, but continue parsing and + // semantic analysis (to ensure all warnings and errors are emitted). + if (Diags.hasErrorOccurred()) + continue; + + if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) { + CodeGen::CodeGenFunction(Builder, FD); + } else if (isa<TypedefDecl>(D)) { + std::cerr << "Read top-level typedef decl: '" << D->getName() << "'\n"; + } else { + std::cerr << "Read top-level variable decl: '" << D->getName() << "'\n"; + } + } + + if (PrintStats) { + std::cerr << "\nSTATISTICS:\n"; + CodeGen::PrintStats(Builder); + ASTStreamer_PrintStats(Streamer); + Context.PrintStats(); + } + + CodeGen::Terminate(Builder); + ASTStreamer_Terminate(Streamer); + + // Print the generated code. + M.print(std::cout); +} + diff --git a/Driver/Makefile b/Driver/Makefile new file mode 100644 index 0000000000..4c9db0dc2d --- /dev/null +++ b/Driver/Makefile @@ -0,0 +1,8 @@ +LEVEL = ../../.. +CPPFLAGS += -I$(PROJ_SRC_DIR)/../include +CXXFLAGS = -fno-rtti + +TOOLNAME = clang +USEDLIBS = clangCodeGen.a clangSEMA.a clangAST.a clangParse.a clangLex.a clangBasic.a LLVMCore.a LLVMSupport.a LLVMSystem.a + +include $(LEVEL)/Makefile.common diff --git a/Driver/PPCBuiltins.def b/Driver/PPCBuiltins.def new file mode 100644 index 0000000000..6aed2caa4d --- /dev/null +++ b/Driver/PPCBuiltins.def @@ -0,0 +1,24 @@ +//===--- PPCBuiltins.def - PowerPC Builtin function database ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PowerPC-specific builtin function database. Users of +// this file must define the BUILTIN macro to make use of this information. +// +//===----------------------------------------------------------------------===// + +// FIXME: this needs to be the full list supported by GCC. Right now, I'm just +// adding stuff on demand. + +// The format of this database matches clang/AST/Builtins.def. + +// This is just a placeholder, the types and attributes are wrong. +BUILTIN(__builtin_altivec_abs_v4sf , "ii" , "nc") +// FIXME: Obviously incomplete. + +#undef BUILTIN diff --git a/Driver/PrintParserCallbacks.cpp b/Driver/PrintParserCallbacks.cpp new file mode 100644 index 0000000000..3730d19a7d --- /dev/null +++ b/Driver/PrintParserCallbacks.cpp @@ -0,0 +1,55 @@ +//===--- PrintParserActions.cpp - Implement -parse-print-callbacks mode ---===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This code simply runs the preprocessor on the input file and prints out the +// result. This is the traditional behavior of the -E option. +// +//===----------------------------------------------------------------------===// + +#include "clang.h" +#include "clang/Lex/IdentifierTable.h" +#include "clang/Parse/Action.h" +#include "clang/Parse/DeclSpec.h" +#include <iostream> +using namespace clang; + +namespace { + class ParserPrintActions : public MinimalAction { + + /// ParseDeclarator - This callback is invoked when a declarator is parsed + /// and 'Init' specifies the initializer if any. This is for things like: + /// "int X = 4" or "typedef int foo". + virtual DeclTy *ParseDeclarator(Scope *S, Declarator &D, ExprTy *Init, + DeclTy *LastInGroup) { + std::cout << "ParseDeclarator "; + if (IdentifierInfo *II = D.getIdentifier()) { + std::cout << "'" << II->getName() << "'"; + } else { + std::cout << "<anon>"; + } + std::cout << "\n"; + + // Pass up to EmptyActions so that the symbol table is maintained right. + return MinimalAction::ParseDeclarator(S, D, Init, LastInGroup); + } + + /// PopScope - This callback is called immediately before the specified scope + /// is popped and deleted. + virtual void PopScope(SourceLocation Loc, Scope *S) { + std::cout << "PopScope\n"; + + // Pass up to EmptyActions so that the symbol table is maintained right. + MinimalAction::PopScope(Loc, S); + } + }; +} + +MinimalAction *clang::CreatePrintParserActionsAction() { + return new ParserPrintActions(); +} diff --git a/Driver/PrintPreprocessedOutput.cpp b/Driver/PrintPreprocessedOutput.cpp new file mode 100644 index 0000000000..4a4f6783da --- /dev/null +++ b/Driver/PrintPreprocessedOutput.cpp @@ -0,0 +1,436 @@ +//===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This code simply runs the preprocessor on the input file and prints out the +// result. This is the traditional behavior of the -E option. +// +//===----------------------------------------------------------------------===// + +#include "clang.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/Pragma.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Config/config.h" +#include <cstdio> +using namespace clang; + +//===----------------------------------------------------------------------===// +// Simple buffered I/O +//===----------------------------------------------------------------------===// +// +// Empirically, iostream is over 30% slower than stdio for this workload, and +// stdio itself isn't very well suited. The problem with stdio is use of +// putchar_unlocked. We have many newline characters that need to be emitted, +// but stdio needs to do extra checks to handle line buffering mode. These +// extra checks make putchar_unlocked fall off its inlined code path, hitting +// slow system code. In practice, using 'write' directly makes 'clang -E -P' +// about 10% faster than using the stdio path on darwin. + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#else +#define USE_STDIO 1 +#endif + +static char *OutBufStart = 0, *OutBufEnd, *OutBufCur; + +/// InitOutputBuffer - Initialize our output buffer. +/// +static void InitOutputBuffer() { +#ifndef USE_STDIO + OutBufStart = new char[64*1024]; + OutBufEnd = OutBufStart+64*1024; + OutBufCur = OutBufStart; +#endif +} + +/// FlushBuffer - Write the accumulated bytes to the output stream. +/// +static void FlushBuffer() { +#ifndef USE_STDIO + write(STDOUT_FILENO, OutBufStart, OutBufCur-OutBufStart); + OutBufCur = OutBufStart; +#endif +} + +/// CleanupOutputBuffer - Finish up output. +/// +static void CleanupOutputBuffer() { +#ifndef USE_STDIO + FlushBuffer(); + delete [] OutBufStart; +#endif +} + +static void OutputChar(char c) { +#ifdef USE_STDIO + putchar_unlocked(c); +#else + if (OutBufCur >= OutBufEnd) + FlushBuffer(); + *OutBufCur++ = c; +#endif +} + +static void OutputString(const char *Ptr, unsigned Size) { +#ifdef USE_STDIO + fwrite(Ptr, Size, 1, stdout); +#else + if (OutBufCur+Size >= OutBufEnd) + FlushBuffer(); + memcpy(OutBufCur, Ptr, Size); + OutBufCur += Size; +#endif +} + + +//===----------------------------------------------------------------------===// +// Preprocessed token printer +//===----------------------------------------------------------------------===// + +static llvm::cl::opt<bool> +DisableLineMarkers("P", llvm::cl::desc("Disable linemarker output in -E mode")); +static llvm::cl::opt<bool> +EnableCommentOutput("C", llvm::cl::desc("Enable comment output in -E mode")); +static llvm::cl::opt<bool> +EnableMacroCommentOutput("CC", + llvm::cl::desc("Enable comment output in -E mode, " + "even from macro expansions")); + +namespace { +class PrintPPOutputPPCallbacks : public PPCallbacks { + Preprocessor &PP; + unsigned CurLine; + std::string CurFilename; + bool EmittedTokensOnThisLine; + DirectoryLookup::DirType FileType; +public: + PrintPPOutputPPCallbacks(Preprocessor &pp) : PP(pp) { + CurLine = 0; + CurFilename = "\"<uninit>\""; + EmittedTokensOnThisLine = false; + FileType = DirectoryLookup::NormalHeaderDir; + } + + void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; } + + virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, + DirectoryLookup::DirType FileType); + virtual void Ident(SourceLocation Loc, const std::string &str); + + + void HandleFirstTokOnLine(LexerToken &Tok); + void MoveToLine(SourceLocation Loc); + bool AvoidConcat(const LexerToken &PrevTok, const LexerToken &Tok); +}; +} + +/// MoveToLine - Move the output to the source line specified by the location +/// object. We can do this by emitting some number of \n's, or be emitting a +/// #line directive. +void PrintPPOutputPPCallbacks::MoveToLine(SourceLocation Loc) { + if (DisableLineMarkers) { + if (EmittedTokensOnThisLine) { + OutputChar('\n'); + EmittedTokensOnThisLine = false; + } + return; + } + + unsigned LineNo = PP.getSourceManager().getLineNumber(Loc); + + // If this line is "close enough" to the original line, just print newlines, + // otherwise print a #line directive. + if (LineNo-CurLine < 8) { + unsigned Line = CurLine; + for (; Line != LineNo; ++Line) + OutputChar('\n'); + CurLine = Line; + } else { + if (EmittedTokensOnThisLine) { + OutputChar('\n'); + EmittedTokensOnThisLine = false; + } + + CurLine = LineNo; + + OutputChar('#'); + OutputChar(' '); + std::string Num = llvm::utostr_32(LineNo); + OutputString(&Num[0], Num.size()); + OutputChar(' '); + OutputString(&CurFilename[0], CurFilename.size()); + + if (FileType == DirectoryLookup::SystemHeaderDir) + OutputString(" 3", 2); + else if (FileType == DirectoryLookup::ExternCSystemHeaderDir) + OutputString(" 3 4", 4); + OutputChar('\n'); + } +} + + +/// FileChanged - Whenever the preprocessor enters or exits a #include file +/// it invokes this handler. Update our conception of the current source +/// position. +void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc, + FileChangeReason Reason, + DirectoryLookup::DirType FileType) { + if (DisableLineMarkers) return; + + // Unless we are exiting a #include, make sure to skip ahead to the line the + // #include directive was at. + SourceManager &SourceMgr = PP.getSourceManager(); + if (Reason == PPCallbacks::EnterFile) { + MoveToLine(SourceMgr.getIncludeLoc(Loc.getFileID())); + } else if (Reason == PPCallbacks::SystemHeaderPragma) { + MoveToLine(Loc); + + // TODO GCC emits the # directive for this directive on the line AFTER the + // directive and emits a bunch of spaces that aren't needed. Emulate this + // strange behavior. + } + + CurLine = SourceMgr.getLineNumber(Loc); + CurFilename = '"' + Lexer::Stringify(SourceMgr.getSourceName(Loc)) + '"'; + FileType = FileType; + + if (EmittedTokensOnThisLine) { + OutputChar('\n'); + EmittedTokensOnThisLine = false; + } + + if (DisableLineMarkers) return; + + OutputChar('#'); + OutputChar(' '); + std::string Num = llvm::utostr_32(CurLine); + OutputString(&Num[0], Num.size()); + OutputChar(' '); + OutputString(&CurFilename[0], CurFilename.size()); + + switch (Reason) { + case PPCallbacks::EnterFile: + OutputString(" 1", 2); + break; + case PPCallbacks::ExitFile: + OutputString(" 2", 2); + break; + case PPCallbacks::SystemHeaderPragma: break; + case PPCallbacks::RenameFile: break; + } + + if (FileType == DirectoryLookup::SystemHeaderDir) + OutputString(" 3", 2); + else if (FileType == DirectoryLookup::ExternCSystemHeaderDir) + OutputString(" 3 4", 4); + + OutputChar('\n'); +} + +/// HandleIdent - Handle #ident directives when read by the preprocessor. +/// +void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) { + MoveToLine(Loc); + + OutputString("#ident ", strlen("#ident ")); + OutputString(&S[0], S.size()); + EmittedTokensOnThisLine = true; +} + +/// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this +/// is called for the first token on each new line. +void PrintPPOutputPPCallbacks::HandleFirstTokOnLine(LexerToken &Tok) { + // Figure out what line we went to and insert the appropriate number of + // newline characters. + MoveToLine(Tok.getLocation()); + + // Print out space characters so that the first token on a line is + // indented for easy reading. + unsigned ColNo = + PP.getSourceManager().getColumnNumber(Tok.getLocation()); + + // This hack prevents stuff like: + // #define HASH # + // HASH define foo bar + // From having the # character end up at column 1, which makes it so it + // is not handled as a #define next time through the preprocessor if in + // -fpreprocessed mode. + if (ColNo <= 1 && Tok.getKind() == tok::hash) + OutputChar(' '); + + // Otherwise, indent the appropriate number of spaces. + for (; ColNo > 1; --ColNo) + OutputChar(' '); +} + +namespace { +struct UnknownPragmaHandler : public PragmaHandler { + const char *Prefix; + PrintPPOutputPPCallbacks *Callbacks; + + UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks) + : PragmaHandler(0), Prefix(prefix), Callbacks(callbacks) {} + virtual void HandlePragma(Preprocessor &PP, LexerToken &PragmaTok) { + // Figure out what line we went to and insert the appropriate number of + // newline characters. + Callbacks->MoveToLine(PragmaTok.getLocation()); + OutputString(Prefix, strlen(Prefix)); + + // Read and print all of the pragma tokens. + while (PragmaTok.getKind() != tok::eom) { + if (PragmaTok.hasLeadingSpace()) + OutputChar(' '); + std::string TokSpell = PP.getSpelling(PragmaTok); + OutputString(&TokSpell[0], TokSpell.size()); + PP.LexUnexpandedToken(PragmaTok); + } + OutputChar('\n'); + } +}; +} // end anonymous namespace + +/// AvoidConcat - If printing PrevTok immediately followed by Tok would cause +/// the two individual tokens to be lexed as a single token, return true (which +/// causes a space to be printed between them). This allows the output of -E +/// mode to be lexed to the same token stream as lexing the input directly +/// would. +/// +/// This code must conservatively return true if it doesn't want to be 100% +/// accurate. This will cause the output to include extra space characters, but +/// the resulting output won't have incorrect concatenations going on. Examples +/// include "..", which we print with a space between, because we don't want to +/// track enough to tell "x.." from "...". +bool PrintPPOutputPPCallbacks::AvoidConcat(const LexerToken &PrevTok, + const LexerToken &Tok) { + char Buffer[256]; + + // If we haven't emitted a token on this line yet, PrevTok isn't useful to + // look at and no concatenation could happen anyway. + if (!EmittedTokensOnThisLine) + return false; + + // Basic algorithm: we look at the first character of the second token, and + // determine whether it, if appended to the first token, would form (or would + // contribute) to a larger token if concatenated. + char FirstChar; + if (IdentifierInfo *II = Tok.getIdentifierInfo()) { + // Avoid spelling identifiers, the most common form of token. + FirstChar = II->getName()[0]; + } else if (Tok.getLength() < 256) { + const char *TokPtr = Buffer; + PP.getSpelling(Tok, TokPtr); + FirstChar = TokPtr[0]; + } else { + FirstChar = PP.getSpelling(Tok)[0]; + } + + tok::TokenKind PrevKind = PrevTok.getKind(); + if (PrevTok.getIdentifierInfo()) // Language keyword or named operator. + PrevKind = tok::identifier; + + switch (PrevKind) { + default: return false; + case tok::identifier: // id+id or id+number or id+L"foo". + return isalnum(FirstChar) || FirstChar == '_'; + case tok::numeric_constant: + return isalnum(FirstChar) || Tok.getKind() == tok::numeric_constant || + FirstChar == '+' || FirstChar == '-' || FirstChar == '.'; + case tok::period: // ..., .*, .1234 + return FirstChar == '.' || FirstChar == '*' || isdigit(FirstChar); + case tok::amp: // &&, &= + return FirstChar == '&' || FirstChar == '='; + case tok::plus: // ++, += + return FirstChar == '+' || FirstChar == '='; + case tok::minus: // --, ->, -=, ->* + return FirstChar == '-' || FirstChar == '>' || FirstChar == '='; + case tok::slash: // /=, /*, // + return FirstChar == '=' || FirstChar == '*' || FirstChar == '/'; + case tok::less: // <<, <<=, <=, <?=, <?, <:, <% + return FirstChar == '<' || FirstChar == '?' || FirstChar == '=' || + FirstChar == ':' || FirstChar == '%'; + case tok::greater: // >>, >=, >>=, >?=, >?, ->* + return FirstChar == '>' || FirstChar == '?' || FirstChar == '=' || + FirstChar == '*'; + case tok::pipe: // ||, |= + return FirstChar == '|' || FirstChar == '='; + case tok::percent: // %=, %>, %: + return FirstChar == '=' || FirstChar == '>' || FirstChar == ':'; + case tok::colon: // ::, :> + return FirstChar == ':' || FirstChar == '>'; + case tok::hash: // ##, #@, %:%: + return FirstChar == '#' || FirstChar == '@' || FirstChar == '%'; + case tok::arrow: // ->* + return FirstChar == '*'; + + case tok::star: // *= + case tok::exclaim: // != + case tok::lessless: // <<= + case tok::greaterequal: // >>= + case tok::caret: // ^= + case tok::equal: // == + // Cases that concatenate only if the next char is =. + return FirstChar == '='; + } +} + +/// DoPrintPreprocessedInput - This implements -E mode. +/// +void clang::DoPrintPreprocessedInput(unsigned MainFileID, Preprocessor &PP, + const LangOptions &Options) { + // Inform the preprocessor whether we want it to retain comments or not, due + // to -C or -CC. + PP.SetCommentRetentionState(EnableCommentOutput, EnableMacroCommentOutput); + + InitOutputBuffer(); + + LexerToken Tok, PrevTok; + char Buffer[256]; + PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks(PP); + PP.setPPCallbacks(Callbacks); + + PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks)); + PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",Callbacks)); + + // After we have configured the preprocessor, enter the main file. + + // Start parsing the specified input file. + PP.EnterSourceFile(MainFileID, 0, true); + + do { + PrevTok = Tok; + PP.Lex(Tok); + + // If this token is at the start of a line, emit newlines if needed. + if (Tok.isAtStartOfLine()) { + Callbacks->HandleFirstTokOnLine(Tok); + } else if (Tok.hasLeadingSpace() || + // Don't print "-" next to "-", it would form "--". + Callbacks->AvoidConcat(PrevTok, Tok)) { + OutputChar(' '); + } + + if (Tok.getLength() < 256) { + const char *TokPtr = Buffer; + unsigned Len = PP.getSpelling(Tok, TokPtr); + OutputString(TokPtr, Len); + } else { + std::string S = PP.getSpelling(Tok); + OutputString(&S[0], S.size()); + } + Callbacks->SetEmittedTokensOnThisLine(); + } while (Tok.getKind() != tok::eof); + OutputChar('\n'); + + CleanupOutputBuffer(); +} + diff --git a/Driver/Targets.cpp b/Driver/Targets.cpp new file mode 100644 index 0000000000..168084dcd7 --- /dev/null +++ b/Driver/Targets.cpp @@ -0,0 +1,443 @@ +//===--- Targets.cpp - Implement -arch option and targets -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the -arch command line option and creates a TargetInfo +// that represents them. +// +//===----------------------------------------------------------------------===// + +#include "clang.h" +#include "clang/AST/Builtins.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/Support/CommandLine.h" +using namespace clang; + +/// Note: a hard coded list of targets is clearly silly, these should be +/// dynamicly registered and loadable with "-load". +enum SupportedTargets { + target_ppc, target_ppc64, + target_i386, target_x86_64, + target_linux_i386 +}; + +static llvm::cl::list<SupportedTargets> +Archs("arch", llvm::cl::desc("Architectures to compile for"), +llvm::cl::values(clEnumValN(target_ppc, "ppc", "32-bit Darwin PowerPC"), + clEnumValN(target_ppc64, "ppc64", "64-bit Darwin PowerPC"), + clEnumValN(target_i386, "i386", "32-bit Darwin X86"), + clEnumValN(target_x86_64, "x86_64","64-bit Darwin X86"), + clEnumValN(target_linux_i386,"linux", "Linux i386"), + clEnumValEnd)); + +//===----------------------------------------------------------------------===// +// Common code shared among targets. +//===----------------------------------------------------------------------===// + +namespace { +class DarwinTargetInfo : public TargetInfoImpl { +public: + virtual void getTargetDefines(std::vector<std::string> &Defines) const { + Defines.push_back("__APPLE__=1"); + Defines.push_back("__MACH__=1"); + + if (1) {// -fobjc-gc controls this. + Defines.push_back("__weak="); + Defines.push_back("__strong="); + } else { + Defines.push_back("__weak=__attribute__((objc_gc(weak)))"); + Defines.push_back("__strong=__attribute__((objc_gc(strong)))"); + Defines.push_back("__OBJC_GC__"); + } + + // darwin_constant_cfstrings controls this. + Defines.push_back("__CONSTANT_CFSTRINGS__=1"); + + if (0) // darwin_pascal_strings + Defines.push_back("__PASCAL_STRINGS__"); + } + +}; +} // end anonymous namespace. + + +/// getPowerPCDefines - Return a set of the PowerPC-specific #defines that are +/// not tied to a specific subtarget. +static void getPowerPCDefines(std::vector<std::string> &Defines, bool is64Bit) { + // Target identification. + Defines.push_back("__ppc__"); + Defines.push_back("_ARCH_PPC=1"); + Defines.push_back("__POWERPC__=1"); + if (is64Bit) { + Defines.push_back("_ARCH_PPC64"); + Defines.push_back("_LP64"); + Defines.push_back("__LP64__"); + Defines.push_back("__ppc64__"); + } else { + Defines.push_back("__ppc__=1"); + } + + // Target properties. + Defines.push_back("_BIG_ENDIAN=1"); + Defines.push_back("__BIG_ENDIAN__=1"); + + if (is64Bit) { + Defines.push_back("__INTMAX_MAX__=9223372036854775807L"); + Defines.push_back("__INTMAX_TYPE__=long int"); + Defines.push_back("__LONG_MAX__=9223372036854775807L"); + Defines.push_back("__PTRDIFF_TYPE__=long int"); + Defines.push_back("__UINTMAX_TYPE__=long unsigned int"); + } else { + Defines.push_back("__INTMAX_MAX__=9223372036854775807LL"); + Defines.push_back("__INTMAX_TYPE__=long long int"); + Defines.push_back("__LONG_MAX__=2147483647L"); + Defines.push_back("__PTRDIFF_TYPE__=int"); + Defines.push_back("__UINTMAX_TYPE__=long long unsigned int"); + } + Defines.push_back("__INT_MAX__=2147483647"); + Defines.push_back("__LONG_LONG_MAX__=9223372036854775807LL"); + Defines.push_back("__CHAR_BIT__=8"); + Defines.push_back("__SCHAR_MAX__=127"); + Defines.push_back("__SHRT_MAX__=32767"); + Defines.push_back("__SIZE_TYPE__=long unsigned int"); + + // Subtarget options. + Defines.push_back("__USER_LABEL_PREFIX__=_"); + Defines.push_back("__NATURAL_ALIGNMENT__=1"); + Defines.push_back("__REGISTER_PREFIX__="); + + Defines.push_back("__WCHAR_MAX__=2147483647"); + Defines.push_back("__WCHAR_TYPE__=int"); + Defines.push_back("__WINT_TYPE__=int"); + + // Float macros. + Defines.push_back("__FLT_DENORM_MIN__=1.40129846e-45F"); + Defines.push_back("__FLT_DIG__=6"); + Defines.push_back("__FLT_EPSILON__=1.19209290e-7F"); + Defines.push_back("__FLT_EVAL_METHOD__=0"); + Defines.push_back("__FLT_HAS_INFINITY__=1"); + Defines.push_back("__FLT_HAS_QUIET_NAN__=1"); + Defines.push_back("__FLT_MANT_DIG__=24"); + Defines.push_back("__FLT_MAX_10_EXP__=38"); + Defines.push_back("__FLT_MAX_EXP__=128"); + Defines.push_back("__FLT_MAX__=3.40282347e+38F"); + Defines.push_back("__FLT_MIN_10_EXP__=(-37)"); + Defines.push_back("__FLT_MIN_EXP__=(-125)"); + Defines.push_back("__FLT_MIN__=1.17549435e-38F"); + Defines.push_back("__FLT_RADIX__=2"); + + // double macros. + Defines.push_back("__DBL_DENORM_MIN__=4.9406564584124654e-324"); + Defines.push_back("__DBL_DIG__=15"); + Defines.push_back("__DBL_EPSILON__=2.2204460492503131e-16"); + Defines.push_back("__DBL_HAS_INFINITY__=1"); + Defines.push_back("__DBL_HAS_QUIET_NAN__=1"); + Defines.push_back("__DBL_MANT_DIG__=53"); + Defines.push_back("__DBL_MAX_10_EXP__=308"); + Defines.push_back("__DBL_MAX_EXP__=1024"); + Defines.push_back("__DBL_MAX__=1.7976931348623157e+308"); + Defines.push_back("__DBL_MIN_10_EXP__=(-307)"); + Defines.push_back("__DBL_MIN_EXP__=(-1021)"); + Defines.push_back("__DBL_MIN__=2.2250738585072014e-308"); + Defines.push_back("__DECIMAL_DIG__=33"); + + // 128-bit long double macros. + Defines.push_back("__LDBL_DENORM_MIN__=4.940656458412465441765687" + "92868221e-324L"); + Defines.push_back("__LDBL_DIG__=31"); + Defines.push_back("__LDBL_EPSILON__=4.9406564584124654417656879286822" + "1e-324L"); + Defines.push_back("__LDBL_HAS_INFINITY__=1"); + Defines.push_back("__LDBL_HAS_QUIET_NAN__=1"); + Defines.push_back("__LDBL_MANT_DIG__=106"); + Defines.push_back("__LDBL_MAX_10_EXP__=308"); + Defines.push_back("__LDBL_MAX_EXP__=1024"); + Defines.push_back("__LDBL_MAX__=1.7976931348623158079372897140" + "5301e+308L"); + Defines.push_back("__LDBL_MIN_10_EXP__=(-291)"); + Defines.push_back("__LDBL_MIN_EXP__=(-968)"); + Defines.push_back("__LDBL_MIN__=2.004168360008972777996108051350" + "16e-292L"); + Defines.push_back("__LONG_DOUBLE_128__=1"); + +} + +/// getX86Defines - Return a set of the X86-specific #defines that are +/// not tied to a specific subtarget. +static void getX86Defines(std::vector<std::string> &Defines, bool is64Bit) { + // Target identification. + if (is64Bit) { + Defines.push_back("_LP64"); + Defines.push_back("__LP64__"); + Defines.push_back("__amd64__"); + Defines.push_back("__amd64"); + Defines.push_back("__x86_64"); + Defines.push_back("__x86_64__"); + } else { + Defines.push_back("__i386__=1"); + Defines.push_back("__i386=1"); + Defines.push_back("i386=1"); + } + + // Target properties. + Defines.push_back("__LITTLE_ENDIAN__=1"); + + if (is64Bit) { + Defines.push_back("__INTMAX_MAX__=9223372036854775807L"); + Defines.push_back("__INTMAX_TYPE__=long int"); + Defines.push_back("__LONG_MAX__=9223372036854775807L"); + Defines.push_back("__PTRDIFF_TYPE__=long int"); + Defines.push_back("__UINTMAX_TYPE__=long unsigned int"); + } else { + Defines.push_back("__INTMAX_MAX__=9223372036854775807LL"); + Defines.push_back("__INTMAX_TYPE__=long long int"); + Defines.push_back("__LONG_MAX__=2147483647L"); + Defines.push_back("__PTRDIFF_TYPE__=int"); + Defines.push_back("__UINTMAX_TYPE__=long long unsigned int"); + } + Defines.push_back("__CHAR_BIT__=8"); + Defines.push_back("__INT_MAX__=2147483647"); + Defines.push_back("__LONG_LONG_MAX__=9223372036854775807LL"); + Defines.push_back("__SCHAR_MAX__=127"); + Defines.push_back("__SHRT_MAX__=32767"); + Defines.push_back("__SIZE_TYPE__=long unsigned int"); + + // Subtarget options. + Defines.push_back("__nocona=1"); + Defines.push_back("__nocona__=1"); + Defines.push_back("__tune_nocona__=1"); + Defines.push_back("__SSE2_MATH__=1"); + Defines.push_back("__SSE2__=1"); + Defines.push_back("__SSE_MATH__=1"); + Defines.push_back("__SSE__=1"); + Defines.push_back("__MMX__=1"); + Defines.push_back("__REGISTER_PREFIX__="); + + Defines.push_back("__WCHAR_MAX__=2147483647"); + Defines.push_back("__WCHAR_TYPE__=int"); + Defines.push_back("__WINT_TYPE__=int"); + + // Float macros. + Defines.push_back("__FLT_DENORM_MIN__=1.40129846e-45F"); + Defines.push_back("__FLT_DIG__=6"); + Defines.push_back("__FLT_EPSILON__=1.19209290e-7F"); + Defines.push_back("__FLT_EVAL_METHOD__=0"); + Defines.push_back("__FLT_HAS_INFINITY__=1"); + Defines.push_back("__FLT_HAS_QUIET_NAN__=1"); + Defines.push_back("__FLT_MANT_DIG__=24"); + Defines.push_back("__FLT_MAX_10_EXP__=38"); + Defines.push_back("__FLT_MAX_EXP__=128"); + Defines.push_back("__FLT_MAX__=3.40282347e+38F"); + Defines.push_back("__FLT_MIN_10_EXP__=(-37)"); + Defines.push_back("__FLT_MIN_EXP__=(-125)"); + Defines.push_back("__FLT_MIN__=1.17549435e-38F"); + Defines.push_back("__FLT_RADIX__=2"); + + // Double macros. + Defines.push_back("__DBL_DENORM_MIN__=4.9406564584124654e-324"); + Defines.push_back("__DBL_DIG__=15"); + Defines.push_back("__DBL_EPSILON__=2.2204460492503131e-16"); + Defines.push_back("__DBL_HAS_INFINITY__=1"); + Defines.push_back("__DBL_HAS_QUIET_NAN__=1"); + Defines.push_back("__DBL_MANT_DIG__=53"); + Defines.push_back("__DBL_MAX_10_EXP__=308"); + Defines.push_back("__DBL_MAX_EXP__=1024"); + Defines.push_back("__DBL_MAX__=1.7976931348623157e+308"); + Defines.push_back("__DBL_MIN_10_EXP__=(-307)"); + Defines.push_back("__DBL_MIN_EXP__=(-1021)"); + Defines.push_back("__DBL_MIN__=2.2250738585072014e-308"); + Defines.push_back("__DECIMAL_DIG__=21"); + + // 80-bit Long double macros. + Defines.push_back("__LDBL_DENORM_MIN__=3.64519953188247460253e-4951L"); + Defines.push_back("__LDBL_DIG__=18"); + Defines.push_back("__LDBL_EPSILON__=1.08420217248550443401e-19L"); + Defines.push_back("__LDBL_HAS_INFINITY__=1"); + Defines.push_back("__LDBL_HAS_QUIET_NAN__=1"); + Defines.push_back("__LDBL_MANT_DIG__=64"); + Defines.push_back("__LDBL_MAX_10_EXP__=4932"); + Defines.push_back("__LDBL_MAX_EXP__=16384"); + Defines.push_back("__LDBL_MAX__=1.18973149535723176502e+4932L"); + Defines.push_back("__LDBL_MIN_10_EXP__=(-4931)"); + Defines.push_back("__LDBL_MIN_EXP__=(-16381)"); + Defines.push_back("__LDBL_MIN__=3.36210314311209350626e-4932L"); + +} + +/// PPC builtin info. +namespace PPC { + enum { + LastTIBuiltin = Builtin::FirstTSBuiltin-1, +#define BUILTIN(ID, TYPE, ATTRS) BI##ID, +#include "PPCBuiltins.def" + LastTSBuiltin + }; + + static const Builtin::Info BuiltinInfo[] = { +#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS }, +#include "PPCBuiltins.def" + }; + + static void getBuiltins(const Builtin::Info *&Records, unsigned &NumRecords) { + Records = BuiltinInfo; + NumRecords = LastTSBuiltin-Builtin::FirstTSBuiltin; + } +} // End namespace PPC + + +/// X86 builtin info. +namespace X86 { + enum { + LastTIBuiltin = Builtin::FirstTSBuiltin-1, +#define BUILTIN(ID, TYPE, ATTRS) BI##ID, +#include "X86Builtins.def" + LastTSBuiltin + }; + + static const Builtin::Info BuiltinInfo[] = { +#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS }, +#include "X86Builtins.def" + }; + + static void getBuiltins(const Builtin::Info *&Records, unsigned &NumRecords) { + Records = BuiltinInfo; + NumRecords = LastTSBuiltin-Builtin::FirstTSBuiltin; + } +} // End namespace X86 + +//===----------------------------------------------------------------------===// +// Specific target implementations. +//===----------------------------------------------------------------------===// + + +namespace { +class DarwinPPCTargetInfo : public DarwinTargetInfo { +public: + virtual void getTargetDefines(std::vector<std::string> &Defines) const { + DarwinTargetInfo::getTargetDefines(Defines); + getPowerPCDefines(Defines, false); + } + virtual void getTargetBuiltins(const Builtin::Info *&Records, + unsigned &NumRecords) const { + PPC::getBuiltins(Records, NumRecords); + } +}; +} // end anonymous namespace. + +namespace { +class DarwinPPC64TargetInfo : public DarwinTargetInfo { +public: + virtual void getTargetDefines(std::vector<std::string> &Defines) const { + DarwinTargetInfo::getTargetDefines(Defines); + getPowerPCDefines(Defines, true); + } + virtual void getTargetBuiltins(const Builtin::Info *&Records, + unsigned &NumRecords) const { + PPC::getBuiltins(Records, NumRecords); + } +}; +} // end anonymous namespace. + +namespace { +class DarwinI386TargetInfo : public DarwinTargetInfo { +public: + virtual void getTargetDefines(std::vector<std::string> &Defines) const { + DarwinTargetInfo::getTargetDefines(Defines); + getX86Defines(Defines, false); + } + virtual void getTargetBuiltins(const Builtin::Info *&Records, + unsigned &NumRecords) const { + X86::getBuiltins(Records, NumRecords); + } +}; +} // end anonymous namespace. + +namespace { +class DarwinX86_64TargetInfo : public DarwinTargetInfo { +public: + virtual void getTargetDefines(std::vector<std::string> &Defines) const { + DarwinTargetInfo::getTargetDefines(Defines); + getX86Defines(Defines, true); + } + virtual void getTargetBuiltins(const Builtin::Info *&Records, + unsigned &NumRecords) const { + X86::getBuiltins(Records, NumRecords); + } +}; +} // end anonymous namespace. + +namespace { +class LinuxTargetInfo : public DarwinTargetInfo { +public: + LinuxTargetInfo() { + // Note: I have no idea if this is right, just for testing. + WCharWidth = 16; + } + + virtual void getTargetDefines(std::vector<std::string> &Defines) const { + // TODO: linux-specific stuff. + getX86Defines(Defines, false); + } + virtual void getTargetBuiltins(const Builtin::Info *&Records, + unsigned &NumRecords) const { + X86::getBuiltins(Records, NumRecords); + } +}; +} // end anonymous namespace. + + +//===----------------------------------------------------------------------===// +// Driver code +//===----------------------------------------------------------------------===// + +/// CreateTarget - Create the TargetInfoImpl object for the specified target +/// enum value. +static TargetInfoImpl *CreateTarget(SupportedTargets T) { + switch (T) { + default: assert(0 && "Unknown target!"); + case target_ppc: return new DarwinPPCTargetInfo(); + case target_ppc64: return new DarwinPPC64TargetInfo(); + case target_i386: return new DarwinI386TargetInfo(); + case target_x86_64: return new DarwinX86_64TargetInfo(); + case target_linux_i386: return new LinuxTargetInfo(); + } +} + +/// CreateTargetInfo - Return the set of target info objects as specified by +/// the -arch command line option. +TargetInfo *clang::CreateTargetInfo(Diagnostic &Diags) { + // If the user didn't specify at least one architecture, auto-sense the + // current host. TODO: This is a hack. :) + if (Archs.empty()) { +#ifndef __APPLE__ + // Assume non-apple = linux. + Archs.push_back(target_linux_i386); +#elif (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \ + defined(__ppc64__) + Archs.push_back(target_ppc64); +#elif defined(__POWERPC__) || defined (__ppc__) || defined(_POWER) + Archs.push_back(target_ppc); +#elif defined(__x86_64__) + Archs.push_back(target_x86_64); +#elif defined(__i386__) || defined(i386) || defined(_M_IX86) + Archs.push_back(target_i386); +#else + // Don't know what this is! + return 0; +#endif + } + + // Create the primary target and target info. + TargetInfo *TI = new TargetInfo(CreateTarget(Archs[0]), &Diags); + + // Add all secondary targets. + for (unsigned i = 1, e = Archs.size(); i != e; ++i) + TI->AddSecondaryTarget(CreateTarget(Archs[i])); + return TI; +} diff --git a/Driver/TextDiagnosticBuffer.cpp b/Driver/TextDiagnosticBuffer.cpp new file mode 100644 index 0000000000..1f6075ea92 --- /dev/null +++ b/Driver/TextDiagnosticBuffer.cpp @@ -0,0 +1,38 @@ +//===--- TextDiagnosticBuffer.cpp - Buffer Text Diagnostics ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Bill Wendling and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a concrete diagnostic client, which buffers the diagnostic messages. +// +//===----------------------------------------------------------------------===// + +#include "TextDiagnosticBuffer.h" +#include "clang/Basic/SourceManager.h" +using namespace clang; + +/// HandleDiagnostic - Store the errors & warnings that are reported. +/// +void TextDiagnosticBuffer::HandleDiagnostic(Diagnostic::Level Level, + SourceLocation Pos, + diag::kind ID, + const std::string *Strs, + unsigned NumStrs, + const SourceRange *, + unsigned) { + switch (Level) { + default: assert(0 && "Diagnostic not handled during diagnostic buffering!"); + case Diagnostic::Warning: + Warnings.push_back(std::make_pair(Pos, FormatDiagnostic(Level, ID, Strs, + NumStrs))); + break; + case Diagnostic::Error: + Errors.push_back(std::make_pair(Pos, FormatDiagnostic(Level, ID, Strs, + NumStrs))); + break; + } +} diff --git a/Driver/TextDiagnosticBuffer.h b/Driver/TextDiagnosticBuffer.h new file mode 100644 index 0000000000..34fbc6e27a --- /dev/null +++ b/Driver/TextDiagnosticBuffer.h @@ -0,0 +1,51 @@ +//===--- TextDiagnosticBuffer.h - Buffer Text Diagnostics -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Bill Wendling and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a concrete diagnostic client, which buffers the diagnostic messages. +// +//===----------------------------------------------------------------------===// + +#ifndef DRIVER_TEXT_DIAGNOSTIC_BUFFER_H_ +#define DRIVER_TEXT_DIAGNOSTIC_BUFFER_H_ + +#include "TextDiagnostics.h" +#include <vector> + +namespace clang { + +class Preprocessor; +class SourceManager; + +class TextDiagnosticBuffer : public TextDiagnostics { +public: + typedef std::vector<std::pair<SourceLocation, std::string> > DiagList; + typedef DiagList::iterator iterator; + typedef DiagList::const_iterator const_iterator; +private: + DiagList Errors, Warnings; +public: + TextDiagnosticBuffer(SourceManager &SM) : TextDiagnostics(SM) {} + + const_iterator err_begin() const { return Errors.begin(); } + const_iterator err_end() const { return Errors.end(); } + + const_iterator warn_begin() const { return Warnings.begin(); } + const_iterator warn_end() const { return Warnings.end(); } + + virtual void HandleDiagnostic(Diagnostic::Level DiagLevel, + SourceLocation Pos, + diag::kind ID, const std::string *Strs, + unsigned NumStrs, + const SourceRange *Ranges, + unsigned NumRanges); +}; + +} // end namspace clang + +#endif diff --git a/Driver/TextDiagnosticPrinter.cpp b/Driver/TextDiagnosticPrinter.cpp new file mode 100644 index 0000000000..1acd210898 --- /dev/null +++ b/Driver/TextDiagnosticPrinter.cpp @@ -0,0 +1,225 @@ +//===--- TextDiagnosticPrinter.cpp - Diagnostic Printer -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Bill Wendling and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This diagnostic client prints out their diagnostic messages. +// +//===----------------------------------------------------------------------===// + +#include "TextDiagnosticPrinter.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/Lexer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MemoryBuffer.h" +#include <iostream> +#include <string> +using namespace clang; + +static llvm::cl::opt<bool> +NoShowColumn("fno-show-column", + llvm::cl::desc("Do not include column number on diagnostics")); +static llvm::cl::opt<bool> +NoCaretDiagnostics("fno-caret-diagnostics", + llvm::cl::desc("Do not include source line and caret with" + " diagnostics")); + +void TextDiagnosticPrinter:: +PrintIncludeStack(SourceLocation Pos) { + unsigned FileID = Pos.getFileID(); + if (FileID == 0) return; + + // Print out the other include frames first. + PrintIncludeStack(SourceMgr.getIncludeLoc(FileID)); + + unsigned LineNo = SourceMgr.getLineNumber(Pos); + + const llvm::MemoryBuffer *Buffer = SourceMgr.getBuffer(FileID); + std::cerr << "In file included from " << Buffer->getBufferIdentifier() + << ":" << LineNo << ":\n"; +} + +/// HighlightRange - Given a SourceRange and a line number, highlight (with ~'s) +/// any characters in LineNo that intersect the SourceRange. +void TextDiagnosticPrinter::HighlightRange(const SourceRange &R, + unsigned LineNo, + std::string &CaratLine, + const std::string &SourceLine) { + assert(CaratLine.size() == SourceLine.size() && + "Expect a correspondence between source and carat line!"); + if (!R.isValid()) return; + + unsigned StartLineNo = SourceMgr.getLineNumber(R.Begin()); + if (StartLineNo > LineNo) return; // No intersection. + + unsigned EndLineNo = SourceMgr.getLineNumber(R.End()); + if (EndLineNo < LineNo) return; // No intersection. + + // Compute the column number of the start. + unsigned StartColNo = 0; + if (StartLineNo == LineNo) { + StartColNo = SourceMgr.getColumnNumber(R.Begin()); + if (StartColNo) --StartColNo; // Zero base the col #. + } + + // Pick the first non-whitespace column. + while (StartColNo < SourceLine.size() && + (SourceLine[StartColNo] == ' ' || SourceLine[StartColNo] == '\t')) + ++StartColNo; + + // Compute the column number of the end. + unsigned EndColNo = CaratLine.size(); + if (EndLineNo == LineNo) { + EndColNo = SourceMgr.getColumnNumber(R.End()); + if (EndColNo) { + --EndColNo; // Zero base the col #. + + // Add in the length of the token, so that we cover multi-char tokens. + EndColNo += GetTokenLength(R.End()); + } else { + EndColNo = CaratLine.size(); + } + } + + // Pick the last non-whitespace column. + while (EndColNo-1 && + (SourceLine[EndColNo-1] == ' ' || SourceLine[EndColNo-1] == '\t')) + --EndColNo; + + // Fill the range with ~'s. + assert(StartColNo <= EndColNo && "Invalid range!"); + for (unsigned i = StartColNo; i != EndColNo; ++i) + CaratLine[i] = '~'; +} + +/// GetTokenLength - Given the source location of a token, determine its length. +/// This is a fully general function that uses a lexer to relex the token. +unsigned TextDiagnosticPrinter::GetTokenLength(SourceLocation Loc) { + const char *StrData = + SourceMgr.getCharacterData(SourceMgr.getLogicalLoc(Loc)); + + // Note, this could be special cased for common tokens like identifiers, ')', + // etc to make this faster, if it mattered. + + unsigned FileID = Loc.getFileID(); + + // Create a lexer starting at the beginning of this token. + Lexer TheLexer(SourceMgr.getBuffer(FileID), FileID, + *ThePreprocessor, StrData); + + LexerToken TheTok; + TheLexer.LexRawToken(TheTok); + + return TheTok.getLength(); +} + +void TextDiagnosticPrinter::HandleDiagnostic(Diagnostic::Level Level, + SourceLocation Pos, + diag::kind ID, + const std::string *Strs, + unsigned NumStrs, + const SourceRange *Ranges, + unsigned NumRanges) { + unsigned LineNo = 0, FilePos = 0, FileID = 0, ColNo = 0; + unsigned LineStart = 0, LineEnd = 0; + const llvm::MemoryBuffer *Buffer = 0; + + if (Pos.isValid()) { + LineNo = SourceMgr.getLineNumber(Pos); + FileID = SourceMgr.getLogicalLoc(Pos).getFileID(); + + // First, if this diagnostic is not in the main file, print out the + // "included from" lines. + if (LastWarningLoc != SourceMgr.getIncludeLoc(FileID)) { + LastWarningLoc = SourceMgr.getIncludeLoc(FileID); + PrintIncludeStack(LastWarningLoc); + } + + // Compute the column number. Rewind from the current position to the start + // of the line. + ColNo = SourceMgr.getColumnNumber(Pos); + FilePos = SourceMgr.getSourceFilePos(Pos); + LineStart = FilePos-ColNo+1; // Column # is 1-based + + // Compute the line end. Scan forward from the error position to the end of + // the line. + Buffer = SourceMgr.getBuffer(FileID); + const char *Buf = Buffer->getBufferStart(); + const char *BufEnd = Buffer->getBufferEnd(); + LineEnd = FilePos; + while (Buf+LineEnd != BufEnd && + Buf[LineEnd] != '\n' && Buf[LineEnd] != '\r') + ++LineEnd; + + std::cerr << Buffer->getBufferIdentifier() + << ":" << LineNo << ":"; + if (ColNo && !NoShowColumn) + std::cerr << ColNo << ":"; + std::cerr << " "; + } + + switch (Level) { + default: assert(0 && "Unknown diagnostic type!"); + case Diagnostic::Note: std::cerr << "note: "; break; + case Diagnostic::Warning: std::cerr << "warning: "; break; + case Diagnostic::Error: std::cerr << "error: "; break; + case Diagnostic::Fatal: std::cerr << "fatal error: "; break; + case Diagnostic::Sorry: std::cerr << "sorry, unimplemented: "; + break; + } + + std::cerr << FormatDiagnostic(Level, ID, Strs, NumStrs) << "\n"; + + if (!NoCaretDiagnostics && Pos.isValid()) { + // Get the line of the source file. + const char *Buf = Buffer->getBufferStart(); + std::string SourceLine(Buf+LineStart, Buf+LineEnd); + + // Create a line for the carat that is filled with spaces that is the same + // length as the line of source code. + std::string CaratLine(LineEnd-LineStart, ' '); + + // Highlight all of the characters covered by Ranges with ~ characters. + for (unsigned i = 0; i != NumRanges; ++i) + HighlightRange(Ranges[i], LineNo, CaratLine, SourceLine); + + // Next, insert the carat itself. + if (ColNo-1 < CaratLine.size()) + CaratLine[ColNo-1] = '^'; + else + CaratLine.push_back('^'); + + // Scan the source line, looking for tabs. If we find any, manually expand + // them to 8 characters and update the CaratLine to match. + for (unsigned i = 0; i != SourceLine.size(); ++i) { + if (SourceLine[i] != '\t') continue; + + // Replace this tab with at least one space. + SourceLine[i] = ' '; + + // Compute the number of spaces we need to insert. + unsigned NumSpaces = ((i+8)&~7) - (i+1); + assert(NumSpaces < 8 && "Invalid computation of space amt"); + + // Insert spaces into the SourceLine. + SourceLine.insert(i+1, NumSpaces, ' '); + + // Insert spaces or ~'s into CaratLine. + CaratLine.insert(i+1, NumSpaces, CaratLine[i] == '~' ? '~' : ' '); + } + + // Finally, remove any blank spaces from the end of CaratLine. + while (CaratLine[CaratLine.size()-1] == ' ') + CaratLine.erase(CaratLine.end()-1); + + // Emit what we have computed. + std::cerr << SourceLine << "\n"; + std::cerr << CaratLine << "\n"; + } +} diff --git a/Driver/TextDiagnosticPrinter.h b/Driver/TextDiagnosticPrinter.h new file mode 100644 index 0000000000..71e584ebf4 --- /dev/null +++ b/Driver/TextDiagnosticPrinter.h @@ -0,0 +1,46 @@ +//===--- TextDiagnosticPrinter.h - Text Diagnostic Client -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Bill Wendling and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is a concrete diagnostic client, which prints the diagnostics to +// standard error. +// +//===----------------------------------------------------------------------===// + +#ifndef TEXT_DIAGNOSTIC_PRINTER_H_ +#define TEXT_DIAGNOSTIC_PRINTER_H_ + +#include "TextDiagnostics.h" +#include "clang/Basic/SourceLocation.h" + +namespace clang { +class SourceManager; + +class TextDiagnosticPrinter : public TextDiagnostics { + SourceLocation LastWarningLoc; +public: + TextDiagnosticPrinter(SourceManager &sourceMgr) + : TextDiagnostics(sourceMgr) {} + + void PrintIncludeStack(SourceLocation Pos); + void HighlightRange(const SourceRange &R, unsigned LineNo, + std::string &CaratLine, + const std::string &SourceLine); + unsigned GetTokenLength(SourceLocation Loc); + + virtual void HandleDiagnostic(Diagnostic::Level DiagLevel, + SourceLocation Pos, + diag::kind ID, const std::string *Strs, + unsigned NumStrs, + const SourceRange *Ranges, + unsigned NumRanges); +}; + +} // end namspace clang + +#endif diff --git a/Driver/TextDiagnostics.cpp b/Driver/TextDiagnostics.cpp new file mode 100644 index 0000000000..4fc7e0c921 --- /dev/null +++ b/Driver/TextDiagnostics.cpp @@ -0,0 +1,60 @@ +//===--- TextDiagnostics.cpp - Text Diagnostics Parent Class --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Bill Wendling and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the parent class for all text diagnostics. +// +//===----------------------------------------------------------------------===// + +#include "TextDiagnostics.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/HeaderSearch.h" +using namespace clang; + +TextDiagnostics:: ~TextDiagnostics() {} + +std::string TextDiagnostics::FormatDiagnostic(Diagnostic::Level Level, + diag::kind ID, + const std::string *Strs, + unsigned NumStrs) { + std::string Msg = Diagnostic::getDescription(ID); + + // Replace all instances of %0 in Msg with 'Extra'. + for (unsigned i = 0; i < Msg.size() - 1; ++i) { + if (Msg[i] == '%' && isdigit(Msg[i + 1])) { + unsigned StrNo = Msg[i + 1] - '0'; + Msg = std::string(Msg.begin(), Msg.begin() + i) + + (StrNo < NumStrs ? Strs[StrNo] : "<<<INTERNAL ERROR>>>") + + std::string(Msg.begin() + i + 2, Msg.end()); + } + } + + return Msg; +} + +bool TextDiagnostics::IgnoreDiagnostic(Diagnostic::Level Level, + SourceLocation Pos) { + if (Pos.isValid()) { + // If this is a warning or note, and if it a system header, suppress the + // diagnostic. + if (Level == Diagnostic::Warning || + Level == Diagnostic::Note) { + SourceLocation PhysLoc = SourceMgr.getPhysicalLoc(Pos); + const FileEntry *F = SourceMgr.getFileEntryForFileID(PhysLoc.getFileID()); + if (F) { + DirectoryLookup::DirType DirInfo = TheHeaderSearch->getFileDirFlavor(F); + if (DirInfo == DirectoryLookup::SystemHeaderDir || + DirInfo == DirectoryLookup::ExternCSystemHeaderDir) + return true; + } + } + } + + return false; +} diff --git a/Driver/TextDiagnostics.h b/Driver/TextDiagnostics.h new file mode 100644 index 0000000000..faf1b412b2 --- /dev/null +++ b/Driver/TextDiagnostics.h @@ -0,0 +1,53 @@ +//===--- TextDiagnostics.h - Text Diagnostics Checkers ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Bill Wendling and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the parent class for all text diagnostics. +// +//===----------------------------------------------------------------------===// + +#ifndef TEXT_DIAGNOSTICS_H_ +#define TEXT_DIAGNOSTICS_H_ + +#include "clang/Basic/Diagnostic.h" + +namespace clang { +class SourceManager; +class HeaderSearch; +class Preprocessor; + +class TextDiagnostics : public DiagnosticClient { + HeaderSearch *TheHeaderSearch; +protected: + SourceManager &SourceMgr; + Preprocessor *ThePreprocessor; + + std::string FormatDiagnostic(Diagnostic::Level Level, + diag::kind ID, + const std::string *Strs, + unsigned NumStrs); +public: + TextDiagnostics(SourceManager &sourceMgr) : SourceMgr(sourceMgr) {} + virtual ~TextDiagnostics(); + + void setHeaderSearch(HeaderSearch &HS) { TheHeaderSearch = &HS; } + void setPreprocessor(Preprocessor &P) { ThePreprocessor = &P; } + + virtual bool IgnoreDiagnostic(Diagnostic::Level Level, + SourceLocation Pos); + virtual void HandleDiagnostic(Diagnostic::Level DiagLevel, + SourceLocation Pos, + diag::kind ID, const std::string *Strs, + unsigned NumStrs, + const SourceRange *Ranges, + unsigned NumRanges) = 0; +}; + +} // end namspace clang + +#endif diff --git a/Driver/X86Builtins.def b/Driver/X86Builtins.def new file mode 100644 index 0000000000..c4e3033a55 --- /dev/null +++ b/Driver/X86Builtins.def @@ -0,0 +1,420 @@ +//===--- X86Builtins.def - X86 Builtin function database --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the X86-specific builtin function database. Users of +// this file must define the BUILTIN macro to make use of this information. +// +//===----------------------------------------------------------------------===// + +// FIXME: this needs to be the full list supported by GCC. Right now, I'm just +// adding stuff on demand. + +// The format of this database matches clang/AST/Builtins.def. + +BUILTIN(__builtin_ia32_emms , "v", "") + +// FIXME: These types are incorrect. +// SSE intrinsics. +BUILTIN(__builtin_ia32_comieq, "v", "") +BUILTIN(__builtin_ia32_comilt, "v", "") +BUILTIN(__builtin_ia32_comile, "v", "") +BUILTIN(__builtin_ia32_comigt, "v", "") +BUILTIN(__builtin_ia32_comige, "v", "") +BUILTIN(__builtin_ia32_comineq, "v", "") +BUILTIN(__builtin_ia32_ucomieq, "v", "") +BUILTIN(__builtin_ia32_ucomilt, "v", "") +BUILTIN(__builtin_ia32_ucomile, "v", "") +BUILTIN(__builtin_ia32_ucomigt, "v", "") +BUILTIN(__builtin_ia32_ucomige, "v", "") +BUILTIN(__builtin_ia32_ucomineq, "v", "") +BUILTIN(__builtin_ia32_comisdeq, "v", "") +BUILTIN(__builtin_ia32_comisdlt, "v", "") +BUILTIN(__builtin_ia32_comisdle, "v", "") +BUILTIN(__builtin_ia32_comisdgt, "v", "") +BUILTIN(__builtin_ia32_comisdge, "v", "") +BUILTIN(__builtin_ia32_comisdneq, "v", "") +BUILTIN(__builtin_ia32_ucomisdeq, "v", "") +BUILTIN(__builtin_ia32_ucomisdlt, "v", "") +BUILTIN(__builtin_ia32_ucomisdle, "v", "") +BUILTIN(__builtin_ia32_ucomisdgt, "v", "") +BUILTIN(__builtin_ia32_ucomisdge, "v", "") +BUILTIN(__builtin_ia32_ucomisdneq, "v", "") +BUILTIN(__builtin_ia32_addps, "v", "") +BUILTIN(__builtin_ia32_subps, "v", "") +BUILTIN(__builtin_ia32_mulps, "v", "") +BUILTIN(__builtin_ia32_divps, "v", "") +BUILTIN(__builtin_ia32_addss, "v", "") +BUILTIN(__builtin_ia32_subss, "v", "") +BUILTIN(__builtin_ia32_mulss, "v", "") +BUILTIN(__builtin_ia32_divss, "v", "") +BUILTIN(__builtin_ia32_cmpeqps, "v", "") +BUILTIN(__builtin_ia32_cmpltps, "v", "") +BUILTIN(__builtin_ia32_cmpleps, "v", "") +BUILTIN(__builtin_ia32_cmpgtps, "v", "") +BUILTIN(__builtin_ia32_cmpgeps, "v", "") +BUILTIN(__builtin_ia32_cmpunordps, "v", "") +BUILTIN(__builtin_ia32_cmpneqps, "v", "") +BUILTIN(__builtin_ia32_cmpnltps, "v", "") +BUILTIN(__builtin_ia32_cmpnleps, "v", "") +BUILTIN(__builtin_ia32_cmpngtps, "v", "") +BUILTIN(__builtin_ia32_cmpngeps, "v", "") +BUILTIN(__builtin_ia32_cmpordps, "v", "") +BUILTIN(__builtin_ia32_cmpeqss, "v", "") +BUILTIN(__builtin_ia32_cmpltss, "v", "") +BUILTIN(__builtin_ia32_cmpless, "v", "") +BUILTIN(__builtin_ia32_cmpunordss, "v", "") +BUILTIN(__builtin_ia32_cmpneqss, "v", "") +BUILTIN(__builtin_ia32_cmpnltss, "v", "") +BUILTIN(__builtin_ia32_cmpnless, "v", "") +BUILTIN(__builtin_ia32_cmpngtss, "v", "") +BUILTIN(__builtin_ia32_cmpngess, "v", "") +BUILTIN(__builtin_ia32_cmpordss, "v", "") +BUILTIN(__builtin_ia32_minps, "v", "") +BUILTIN(__builtin_ia32_maxps, "v", "") +BUILTIN(__builtin_ia32_minss, "v", "") +BUILTIN(__builtin_ia32_maxss, "v", "") +BUILTIN(__builtin_ia32_andps, "v", "") +BUILTIN(__builtin_ia32_andnps, "v", "") +BUILTIN(__builtin_ia32_orps, "v", "") +BUILTIN(__builtin_ia32_xorps, "v", "") +BUILTIN(__builtin_ia32_movss, "v", "") +BUILTIN(__builtin_ia32_movhlps, "v", "") +BUILTIN(__builtin_ia32_movlhps, "v", "") +BUILTIN(__builtin_ia32_unpckhps, "v", "") +BUILTIN(__builtin_ia32_unpcklps, "v", "") +BUILTIN(__builtin_ia32_paddb, "v", "") +BUILTIN(__builtin_ia32_paddw, "v", "") +BUILTIN(__builtin_ia32_paddd, "v", "") +BUILTIN(__builtin_ia32_paddq, "v", "") +BUILTIN(__builtin_ia32_psubb, "v", "") +BUILTIN(__builtin_ia32_psubw, "v", "") +BUILTIN(__builtin_ia32_psubd, "v", "") +BUILTIN(__builtin_ia32_psubq, "v", "") +BUILTIN(__builtin_ia32_paddsb, "v", "") +BUILTIN(__builtin_ia32_paddsw, "v", "") +BUILTIN(__builtin_ia32_psubsb, "v", "") +BUILTIN(__builtin_ia32_psubsw, "v", "") +BUILTIN(__builtin_ia32_paddusb, "v", "") +BUILTIN(__builtin_ia32_paddusw, "v", "") +BUILTIN(__builtin_ia32_psubusb, "v", "") +BUILTIN(__builtin_ia32_psubusw, "v", "") +BUILTIN(__builtin_ia32_pmullw, "v", "") +BUILTIN(__builtin_ia32_pmulhw, "v", "") +BUILTIN(__builtin_ia32_pmulhuw, "v", "") +BUILTIN(__builtin_ia32_pand, "v", "") +BUILTIN(__builtin_ia32_pandn, "v", "") +BUILTIN(__builtin_ia32_por, "v", "") +BUILTIN(__builtin_ia32_pxor, "v", "") +BUILTIN(__builtin_ia32_pavgb, "v", "") +BUILTIN(__builtin_ia32_pavgw, "v", "") +BUILTIN(__builtin_ia32_pcmpeqb, "v", "") +BUILTIN(__builtin_ia32_pcmpeqw, "v", "") +BUILTIN(__builtin_ia32_pcmpeqd, "v", "") +BUILTIN(__builtin_ia32_pcmpgtb, "v", "") +BUILTIN(__builtin_ia32_pcmpgtw, "v", "") +BUILTIN(__builtin_ia32_pcmpgtd, "v", "") +BUILTIN(__builtin_ia32_pmaxub, "v", "") +BUILTIN(__builtin_ia32_pmaxsw, "v", "") +BUILTIN(__builtin_ia32_pminub, "v", "") +BUILTIN(__builtin_ia32_pminsw, "v", "") +BUILTIN(__builtin_ia32_punpckhbw, "v", "") +BUILTIN(__builtin_ia32_punpckhwd, "v", "") +BUILTIN(__builtin_ia32_punpckhdq, "v", "") +BUILTIN(__builtin_ia32_punpcklbw, "v", "") +BUILTIN(__builtin_ia32_punpcklwd, "v", "") +BUILTIN(__builtin_ia32_punpckldq, "v", "") +BUILTIN(__builtin_ia32_addpd, "v", "") +BUILTIN(__builtin_ia32_subpd, "v", "") +BUILTIN(__builtin_ia32_mulpd, "v", "") +BUILTIN(__builtin_ia32_divpd, "v", "") +BUILTIN(__builtin_ia32_addsd, "v", "") +BUILTIN(__builtin_ia32_subsd, "v", "") +BUILTIN(__builtin_ia32_mulsd, "v", "") +BUILTIN(__builtin_ia32_divsd, "v", "") +BUILTIN(__builtin_ia32_cmpeqpd, "v", "") +BUILTIN(__builtin_ia32_cmpltpd, "v", "") +BUILTIN(__builtin_ia32_cmplepd, "v", "") +BUILTIN(__builtin_ia32_cmpgtpd, "v", "") +BUILTIN(__builtin_ia32_cmpgepd, "v", "") +BUILTIN(__builtin_ia32_cmpunordpd, "v", "") +BUILTIN(__builtin_ia32_cmpneqpd, "v", "") +BUILTIN(__builtin_ia32_cmpnltpd, "v", "") +BUILTIN(__builtin_ia32_cmpnlepd, "v", "") +BUILTIN(__builtin_ia32_cmpngtpd, "v", "") +BUILTIN(__builtin_ia32_cmpngepd, "v", "") +BUILTIN(__builtin_ia32_cmpordpd, "v", "") +BUILTIN(__builtin_ia32_cmpeqsd, "v", "") +BUILTIN(__builtin_ia32_cmpltsd, "v", "") +BUILTIN(__builtin_ia32_cmplesd, "v", "") +BUILTIN(__builtin_ia32_cmpunordsd, "v", "") +BUILTIN(__builtin_ia32_cmpneqsd, "v", "") +BUILTIN(__builtin_ia32_cmpnltsd, "v", "") +BUILTIN(__builtin_ia32_cmpnlesd, "v", "") +BUILTIN(__builtin_ia32_cmpordsd, "v", "") +BUILTIN(__builtin_ia32_minpd, "v", "") +BUILTIN(__builtin_ia32_maxpd, "v", "") +BUILTIN(__builtin_ia32_minsd, "v", "") +BUILTIN(__builtin_ia32_maxsd, "v", "") +BUILTIN(__builtin_ia32_andpd, "v", "") +BUILTIN(__builtin_ia32_andnpd, "v", "") +BUILTIN(__builtin_ia32_orpd, "v", "") +BUILTIN(__builtin_ia32_xorpd, "v", "") +BUILTIN(__builtin_ia32_movsd, "v", "") +BUILTIN(__builtin_ia32_unpckhpd, "v", "") +BUILTIN(__builtin_ia32_unpcklpd, "v", "") +BUILTIN(__builtin_ia32_paddb128, "v", "") +BUILTIN(__builtin_ia32_paddw128, "v", "") +BUILTIN(__builtin_ia32_paddd128, "v", "") +BUILTIN(__builtin_ia32_paddq128, "v", "") +BUILTIN(__builtin_ia32_psubb128, "v", "") +BUILTIN(__builtin_ia32_psubw128, "v", "") +BUILTIN(__builtin_ia32_psubd128, "v", "") +BUILTIN(__builtin_ia32_psubq128, "v", "") +BUILTIN(__builtin_ia32_paddsb128, "v", "") +BUILTIN(__builtin_ia32_paddsw128, "v", "") +BUILTIN(__builtin_ia32_psubsb128, "v", "") +BUILTIN(__builtin_ia32_psubsw128, "v", "") +BUILTIN(__builtin_ia32_paddusb128, "v", "") +BUILTIN(__builtin_ia32_paddusw128, "v", "") +BUILTIN(__builtin_ia32_psubusb128, "v", "") +BUILTIN(__builtin_ia32_psubusw128, "v", "") +BUILTIN(__builtin_ia32_pmullw128, "v", "") +BUILTIN(__builtin_ia32_pmulhw128, "v", "") +BUILTIN(__builtin_ia32_pand128, "v", "") +BUILTIN(__builtin_ia32_pandn128, "v", "") +BUILTIN(__builtin_ia32_por128, "v", "") +BUILTIN(__builtin_ia32_pxor128, "v", "") +BUILTIN(__builtin_ia32_pavgb128, "v", "") +BUILTIN(__builtin_ia32_pavgw128, "v", "") +BUILTIN(__builtin_ia32_pcmpeqb128, "v", "") +BUILTIN(__builtin_ia32_pcmpeqw128, "v", "") +BUILTIN(__builtin_ia32_pcmpeqd128, "v", "") +BUILTIN(__builtin_ia32_pcmpgtb128, "v", "") +BUILTIN(__builtin_ia32_pcmpgtw128, "v", "") +BUILTIN(__builtin_ia32_pcmpgtd128, "v", "") +BUILTIN(__builtin_ia32_pmaxub128, "v", "") +BUILTIN(__builtin_ia32_pmaxsw128, "v", "") +BUILTIN(__builtin_ia32_pminub128, "v", "") +BUILTIN(__builtin_ia32_pminsw128, "v", "") +BUILTIN(__builtin_ia32_punpckhbw128, "v", "") +BUILTIN(__builtin_ia32_punpckhwd128, "v", "") +BUILTIN(__builtin_ia32_punpckhdq128, "v", "") +BUILTIN(__builtin_ia32_punpckhqdq128, "v", "") +BUILTIN(__builtin_ia32_punpcklbw128, "v", "") +BUILTIN(__builtin_ia32_punpcklwd128, "v", "") +BUILTIN(__builtin_ia32_punpckldq128, "v", "") +BUILTIN(__builtin_ia32_punpcklqdq128, "v", "") +BUILTIN(__builtin_ia32_packsswb128, "v", "") +BUILTIN(__builtin_ia32_packssdw128, "v", "") +BUILTIN(__builtin_ia32_packuswb128, "v", "") +BUILTIN(__builtin_ia32_pmulhuw128, "v", "") +BUILTIN(__builtin_ia32_addsubps, "v", "") +BUILTIN(__builtin_ia32_addsubpd, "v", "") +BUILTIN(__builtin_ia32_haddps, "v", "") +BUILTIN(__builtin_ia32_haddpd, "v", "") +BUILTIN(__builtin_ia32_hsubps, "v", "") +BUILTIN(__builtin_ia32_hsubpd, "v", "") +BUILTIN(__builtin_ia32_phaddw128, "v", "") +BUILTIN(__builtin_ia32_phaddw, "v", "") +BUILTIN(__builtin_ia32_phaddd128, "v", "") +BUILTIN(__builtin_ia32_phaddd, "v", "") +BUILTIN(__builtin_ia32_phaddsw128, "v", "") +BUILTIN(__builtin_ia32_phaddsw, "v", "") +BUILTIN(__builtin_ia32_phsubw128, "v", "") +BUILTIN(__builtin_ia32_phsubw, "v", "") +BUILTIN(__builtin_ia32_phsubd128, "v", "") +BUILTIN(__builtin_ia32_phsubd, "v", "") +BUILTIN(__builtin_ia32_phsubsw128, "v", "") +BUILTIN(__builtin_ia32_phsubsw, "v", "") +BUILTIN(__builtin_ia32_pmaddubsw128, "v", "") +BUILTIN(__builtin_ia32_pmaddubsw, "v", "") +BUILTIN(__builtin_ia32_pmulhrsw128, "v", "") +BUILTIN(__builtin_ia32_pmulhrsw, "v", "") +BUILTIN(__builtin_ia32_pshufb128, "v", "") +BUILTIN(__builtin_ia32_pshufb, "v", "") +BUILTIN(__builtin_ia32_psignb128, "v", "") +BUILTIN(__builtin_ia32_psignb, "v", "") +BUILTIN(__builtin_ia32_psignw128, "v", "") +BUILTIN(__builtin_ia32_psignw, "v", "") +BUILTIN(__builtin_ia32_psignd128, "v", "") +BUILTIN(__builtin_ia32_psignd, "v", "") +BUILTIN(__builtin_ia32_pabsb128, "v", "") +BUILTIN(__builtin_ia32_pabsb, "v", "") +BUILTIN(__builtin_ia32_pabsw128, "v", "") +BUILTIN(__builtin_ia32_pabsw, "v", "") +BUILTIN(__builtin_ia32_pabsd128, "v", "") +BUILTIN(__builtin_ia32_pabsd, "v", "") +BUILTIN(__builtin_ia32_psllw, "v", "") +BUILTIN(__builtin_ia32_pslld, "v", "") +BUILTIN(__builtin_ia32_psllq, "v", "") +BUILTIN(__builtin_ia32_psrlw, "v", "") +BUILTIN(__builtin_ia32_psrld, "v", "") +BUILTIN(__builtin_ia32_psrlq, "v", "") +BUILTIN(__builtin_ia32_psraw, "v", "") +BUILTIN(__builtin_ia32_psrad, "v", "") +BUILTIN(__builtin_ia32_pshufw, "v", "") +BUILTIN(__builtin_ia32_pmaddwd, "v", "") +BUILTIN(__builtin_ia32_packsswb, "v", "") +BUILTIN(__builtin_ia32_packssdw, "v", "") +BUILTIN(__builtin_ia32_packuswb, "v", "") +BUILTIN(__builtin_ia32_ldmxcsr, "v", "") +BUILTIN(__builtin_ia32_stmxcsr, "v", "") +BUILTIN(__builtin_ia32_cvtpi2ps, "v", "") +BUILTIN(__builtin_ia32_cvtps2pi, "v", "") +BUILTIN(__builtin_ia32_cvtsi2ss, "v", "") +BUILTIN(__builtin_ia32_cvtsi642ss, "v", "") +BUILTIN(__builtin_ia32_cvtss2si, "v", "") +BUILTIN(__builtin_ia32_cvtss2si64, "v", "") +BUILTIN(__builtin_ia32_cvttps2pi, "v", "") +BUILTIN(__builtin_ia32_cvttss2si, "v", "") +BUILTIN(__builtin_ia32_cvttss2si64, "v", "") +BUILTIN(__builtin_ia32_maskmovq, "v", "") +BUILTIN(__builtin_ia32_loadups, "v", "") +BUILTIN(__builtin_ia32_storeups, "v", "") +BUILTIN(__builtin_ia32_loadhps, "v", "") +BUILTIN(__builtin_ia32_loadlps, "v", "") +BUILTIN(__builtin_ia32_storehps, "v", "") +BUILTIN(__builtin_ia32_storelps, "v", "") +BUILTIN(__builtin_ia32_movmskps, "v", "") +BUILTIN(__builtin_ia32_pmovmskb, "v", "") +BUILTIN(__builtin_ia32_movntps, "v", "") +BUILTIN(__builtin_ia32_movntq, "v", "") +BUILTIN(__builtin_ia32_sfence, "v", "") +BUILTIN(__builtin_ia32_psadbw, "v", "") +BUILTIN(__builtin_ia32_rcpps, "v", "") +BUILTIN(__builtin_ia32_rcpss, "v", "") +BUILTIN(__builtin_ia32_rsqrtps, "v", "") +BUILTIN(__builtin_ia32_rsqrtss, "v", "") +BUILTIN(__builtin_ia32_sqrtps, "v", "") +BUILTIN(__builtin_ia32_sqrtss, "v", "") +BUILTIN(__builtin_ia32_shufps, "v", "") +BUILTIN(__builtin_ia32_femms, "v", "") +BUILTIN(__builtin_ia32_pavgusb, "v", "") +BUILTIN(__builtin_ia32_pf2id, "v", "") +BUILTIN(__builtin_ia32_pfacc, "v", "") +BUILTIN(__builtin_ia32_pfadd, "v", "") +BUILTIN(__builtin_ia32_pfcmpeq, "v", "") +BUILTIN(__builtin_ia32_pfcmpge, "v", "") +BUILTIN(__builtin_ia32_pfcmpgt, "v", "") +BUILTIN(__builtin_ia32_pfmax, "v", "") +BUILTIN(__builtin_ia32_pfmin, "v", "") +BUILTIN(__builtin_ia32_pfmul, "v", "") +BUILTIN(__builtin_ia32_pfrcp, "v", "") +BUILTIN(__builtin_ia32_pfrcpit1, "v", "") +BUILTIN(__builtin_ia32_pfrcpit2, "v", "") +BUILTIN(__builtin_ia32_pfrsqrt, "v", "") +BUILTIN(__builtin_ia32_pfrsqit1, "v", "") +BUILTIN(__builtin_ia32_pfsub, "v", "") +BUILTIN(__builtin_ia32_pfsubr, "v", "") +BUILTIN(__builtin_ia32_pi2fd, "v", "") +BUILTIN(__builtin_ia32_pmulhrw, "v", "") +BUILTIN(__builtin_ia32_pf2iw, "v", "") +BUILTIN(__builtin_ia32_pfnacc, "v", "") +BUILTIN(__builtin_ia32_pfpnacc, "v", "") +BUILTIN(__builtin_ia32_pi2fw, "v", "") +BUILTIN(__builtin_ia32_pswapdsf, "v", "") +BUILTIN(__builtin_ia32_pswapdsi, "v", "") +BUILTIN(__builtin_ia32_maskmovdqu, "v", "") +BUILTIN(__builtin_ia32_loadupd, "v", "") +BUILTIN(__builtin_ia32_storeupd, "v", "") +BUILTIN(__builtin_ia32_loadhpd, "v", "") +BUILTIN(__builtin_ia32_loadlpd, "v", "") +BUILTIN(__builtin_ia32_movmskpd, "v", "") +BUILTIN(__builtin_ia32_pmovmskb128, "v", "") +BUILTIN(__builtin_ia32_movnti, "v", "") +BUILTIN(__builtin_ia32_movntpd, "v", "") +BUILTIN(__builtin_ia32_movntdq, "v", "") +BUILTIN(__builtin_ia32_pshufd, "v", "") +BUILTIN(__builtin_ia32_pshuflw, "v", "") +BUILTIN(__builtin_ia32_pshufhw, "v", "") +BUILTIN(__builtin_ia32_psadbw128, "v", "") +BUILTIN(__builtin_ia32_sqrtpd, "v", "") +BUILTIN(__builtin_ia32_sqrtsd, "v", "") +BUILTIN(__builtin_ia32_shufpd, "v", "") +BUILTIN(__builtin_ia32_cvtdq2pd, "v", "") +BUILTIN(__builtin_ia32_cvtdq2ps, "v", "") +BUILTIN(__builtin_ia32_cvtpd2dq, "v", "") +BUILTIN(__builtin_ia32_cvtpd2pi, "v", "") +BUILTIN(__builtin_ia32_cvtpd2ps, "v", "") +BUILTIN(__builtin_ia32_cvttpd2dq, "v", "") +BUILTIN(__builtin_ia32_cvttpd2pi, "v", "") +BUILTIN(__builtin_ia32_cvtpi2pd, "v", "") +BUILTIN(__builtin_ia32_cvtsd2si, "v", "") +BUILTIN(__builtin_ia32_cvttsd2si, "v", "") +BUILTIN(__builtin_ia32_cvtsd2si64, "v", "") +BUILTIN(__builtin_ia32_cvttsd2si64, "v", "") +BUILTIN(__builtin_ia32_cvtps2dq, "v", "") +BUILTIN(__builtin_ia32_cvtps2pd, "v", "") +BUILTIN(__builtin_ia32_cvttps2dq, "v", "") +BUILTIN(__builtin_ia32_cvtsi2sd, "v", "") +BUILTIN(__builtin_ia32_cvtsi642sd, "v", "") +BUILTIN(__builtin_ia32_cvtsd2ss, "v", "") +BUILTIN(__builtin_ia32_cvtss2sd, "v", "") +BUILTIN(__builtin_ia32_clflush, "v", "") +BUILTIN(__builtin_ia32_lfence, "v", "") +BUILTIN(__builtin_ia32_mfence, "v", "") +BUILTIN(__builtin_ia32_loaddqu, "v", "") +BUILTIN(__builtin_ia32_storedqu, "v", "") +BUILTIN(__builtin_ia32_psllwi, "v", "") +BUILTIN(__builtin_ia32_pslldi, "v", "") +BUILTIN(__builtin_ia32_psllqi, "v", "") +BUILTIN(__builtin_ia32_psrawi, "v", "") +BUILTIN(__builtin_ia32_psradi, "v", "") +BUILTIN(__builtin_ia32_psrlwi, "v", "") +BUILTIN(__builtin_ia32_psrldi, "v", "") +BUILTIN(__builtin_ia32_psrlqi, "v", "") +BUILTIN(__builtin_ia32_pmuludq, "v", "") +BUILTIN(__builtin_ia32_pmuludq128, "v", "") +BUILTIN(__builtin_ia32_psllw128, "v", "") +BUILTIN(__builtin_ia32_pslld128, "v", "") +BUILTIN(__builtin_ia32_psllq128, "v", "") +BUILTIN(__builtin_ia32_psrlw128, "v", "") +BUILTIN(__builtin_ia32_psrld128, "v", "") +BUILTIN(__builtin_ia32_psrlq128, "v", "") +BUILTIN(__builtin_ia32_psraw128, "v", "") +BUILTIN(__builtin_ia32_psrad128, "v", "") +BUILTIN(__builtin_ia32_pslldqi128, "v", "") +BUILTIN(__builtin_ia32_psllwi128, "v", "") +BUILTIN(__builtin_ia32_pslldi128, "v", "") +BUILTIN(__builtin_ia32_psllqi128, "v", "") +BUILTIN(__builtin_ia32_psrldqi128, "v", "") +BUILTIN(__builtin_ia32_psrlwi128, "v", "") +BUILTIN(__builtin_ia32_psrldi128, "v", "") +BUILTIN(__builtin_ia32_psrlqi128, "v", "") +BUILTIN(__builtin_ia32_psrawi128, "v", "") +BUILTIN(__builtin_ia32_psradi128, "v", "") +BUILTIN(__builtin_ia32_pmaddwd128, "v", "") +BUILTIN(__builtin_ia32_monitor, "v", "") +BUILTIN(__builtin_ia32_mwait, "v", "") +BUILTIN(__builtin_ia32_movshdup, "v", "") +BUILTIN(__builtin_ia32_movsldup, "v", "") +BUILTIN(__builtin_ia32_lddqu, "v", "") +BUILTIN(__builtin_ia32_palignr128, "v", "") +BUILTIN(__builtin_ia32_palignr, "v", "") +BUILTIN(__builtin_ia32_vec_init_v2si, "v", "") +BUILTIN(__builtin_ia32_vec_init_v4hi, "v", "") +BUILTIN(__builtin_ia32_vec_init_v8qi, "v", "") +BUILTIN(__builtin_ia32_vec_ext_v2df, "v", "") +BUILTIN(__builtin_ia32_vec_ext_v2di, "v", "") +BUILTIN(__builtin_ia32_vec_ext_v4sf, "v", "") +BUILTIN(__builtin_ia32_vec_ext_v4si, "v", "") +BUILTIN(__builtin_ia32_vec_ext_v8hi, "v", "") +BUILTIN(__builtin_ia32_vec_ext_v4hi, "v", "") +BUILTIN(__builtin_ia32_vec_ext_v2si, "v", "") +BUILTIN(__builtin_ia32_vec_set_v8hi, "v", "") +BUILTIN(__builtin_ia32_vec_set_v4hi, "v", "") + +// Apple local SSE builtins? These are probably not needed eventually, but are +// in the apple-gcc xmmintrin.h file (rdar://4099020). +BUILTIN(__builtin_ia32_movqv4si, "v", "") +BUILTIN(__builtin_ia32_loadlv4si, "v", "") +BUILTIN(__builtin_ia32_storelv4si, "v", "") + + +#undef BUILTIN diff --git a/Driver/clang.cpp b/Driver/clang.cpp new file mode 100644 index 0000000000..43eaee68f6 --- /dev/null +++ b/Driver/clang.cpp @@ -0,0 +1,914 @@ +//===--- clang.cpp - C-Language Front-end ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This utility may be invoked in the following manner: +// clang --help - Output help info. +// clang [options] - Read from stdin. +// clang [options] file - Read from "file". +// clang [options] file1 file2 - Read these files. +// +//===----------------------------------------------------------------------===// +// +// TODO: Options to support: +// +// -ffatal-errors +// -ftabstop=width +// +//===----------------------------------------------------------------------===// + +#include "clang.h" +#include "ASTStreamers.h" +#include "TextDiagnosticBuffer.h" +#include "TextDiagnosticPrinter.h" +#include "clang/Parse/Parser.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/System/Signals.h" +#include <memory> +using namespace clang; + +//===----------------------------------------------------------------------===// +// Global options. +//===----------------------------------------------------------------------===// + +static llvm::cl::opt<bool> +Verbose("v", llvm::cl::desc("Enable verbose output")); +static llvm::cl::opt<bool> +Stats("stats", llvm::cl::desc("Print performance metrics and statistics")); + +enum ProgActions { + EmitLLVM, // Emit a .ll file. + ParseASTPrint, // Parse ASTs and print them. + ParseASTCheck, // Parse ASTs and check diagnostics. + ParseAST, // Parse ASTs. + ParsePrintCallbacks, // Parse and print each callback. + ParseSyntaxOnly, // Parse and perform semantic analysis. + ParseNoop, // Parse with noop callbacks. + RunPreprocessorOnly, // Just lex, no output. + PrintPreprocessedInput, // -E mode. + DumpTokens // Token dump mode. +}; + +static llvm::cl::opt<ProgActions> +ProgAction(llvm::cl::desc("Choose output type:"), llvm::cl::ZeroOrMore, + llvm::cl::init(ParseSyntaxOnly), + llvm::cl::values( + clEnumValN(RunPreprocessorOnly, "Eonly", + "Just run preprocessor, no output (for timings)"), + clEnumValN(PrintPreprocessedInput, "E", + "Run preprocessor, emit preprocessed file"), + clEnumValN(DumpTokens, "dumptokens", + "Run preprocessor, dump internal rep of tokens"), + clEnumValN(ParseNoop, "parse-noop", + "Run parser with noop callbacks (for timings)"), + clEnumValN(ParseSyntaxOnly, "fsyntax-only", + "Run parser and perform semantic analysis"), + clEnumValN(ParsePrintCallbacks, "parse-print-callbacks", + "Run parser and print each callback invoked"), + clEnumValN(ParseAST, "parse-ast", + "Run parser and build ASTs"), + clEnumValN(ParseASTPrint, "parse-ast-print", + "Run parser, build ASTs, then print ASTs"), + clEnumValN(ParseASTCheck, "parse-ast-check", + "Run parser, build ASTs, then check diagnostics"), + clEnumValN(EmitLLVM, "emit-llvm", + "Build ASTs then convert to LLVM, emit .ll file"), + clEnumValEnd)); + +//===----------------------------------------------------------------------===// +// Language Options +//===----------------------------------------------------------------------===// + +enum LangKind { + langkind_unspecified, + langkind_c, + langkind_c_cpp, + langkind_cxx, + langkind_cxx_cpp, + langkind_objc, + langkind_objc_cpp, + langkind_objcxx, + langkind_objcxx_cpp +}; + +/* TODO: GCC also accepts: + c-header c++-header objective-c-header objective-c++-header + assembler assembler-with-cpp + ada, f77*, ratfor (!), f95, java, treelang + */ +static llvm::cl::opt<LangKind> +BaseLang("x", llvm::cl::desc("Base language to compile"), + llvm::cl::init(langkind_unspecified), + llvm::cl::values(clEnumValN(langkind_c, "c", "C"), + clEnumValN(langkind_cxx, "c++", "C++"), + clEnumValN(langkind_objc, "objective-c", "Objective C"), + clEnumValN(langkind_objcxx,"objective-c++","Objective C++"), + clEnumValN(langkind_c_cpp, "c-cpp-output", + "Preprocessed C"), + clEnumValN(langkind_cxx_cpp, "c++-cpp-output", + "Preprocessed C++"), + clEnumValN(langkind_objc_cpp, "objective-c-cpp-output", + "Preprocessed Objective C"), + clEnumValN(langkind_objcxx_cpp,"objective-c++-cpp-output", + "Preprocessed Objective C++"), + clEnumValEnd)); + +static llvm::cl::opt<bool> +LangObjC("ObjC", llvm::cl::desc("Set base language to Objective-C"), + llvm::cl::Hidden); +static llvm::cl::opt<bool> +LangObjCXX("ObjC++", llvm::cl::desc("Set base language to Objective-C++"), + llvm::cl::Hidden); + +/// InitializeBaseLanguage - Handle the -x foo options or infer a base language +/// from the input filename. +static void InitializeBaseLanguage(LangOptions &Options, + const std::string &Filename) { + if (BaseLang == langkind_unspecified) { + std::string::size_type DotPos = Filename.rfind('.'); + if (LangObjC) { + BaseLang = langkind_objc; + } else if (LangObjCXX) { + BaseLang = langkind_objcxx; + } else if (DotPos == std::string::npos) { + BaseLang = langkind_c; // Default to C if no extension. + } else { + std::string Ext = std::string(Filename.begin()+DotPos+1, Filename.end()); + // C header: .h + // C++ header: .hh or .H; + // assembler no preprocessing: .s + // assembler: .S + if (Ext == "c") + BaseLang = langkind_c; + else if (Ext == "i") + BaseLang = langkind_c_cpp; + else if (Ext == "ii") + BaseLang = langkind_cxx_cpp; + else if (Ext == "m") + BaseLang = langkind_objc; + else if (Ext == "mi") + BaseLang = langkind_objc_cpp; + else if (Ext == "mm" || Ext == "M") + BaseLang = langkind_objcxx; + else if (Ext == "mii") + BaseLang = langkind_objcxx_cpp; + else if (Ext == "C" || Ext == "cc" || Ext == "cpp" || Ext == "CPP" || + Ext == "c++" || Ext == "cp" || Ext == "cxx") + BaseLang = langkind_cxx; + else + BaseLang = langkind_c; + } + } + + // FIXME: implement -fpreprocessed mode. + bool NoPreprocess = false; + + switch (BaseLang) { + default: assert(0 && "Unknown language kind!"); + case langkind_c_cpp: + NoPreprocess = true; + // FALLTHROUGH + case langkind_c: + break; + case langkind_cxx_cpp: + NoPreprocess = true; + // FALLTHROUGH + case langkind_cxx: + Options.CPlusPlus = 1; + break; + case langkind_objc_cpp: + NoPreprocess = true; + // FALLTHROUGH + case langkind_objc: + Options.ObjC1 = Options.ObjC2 = 1; + break; + case langkind_objcxx_cpp: + NoPreprocess = true; + // FALLTHROUGH + case langkind_objcxx: + Options.ObjC1 = Options.ObjC2 = 1; + Options.CPlusPlus = 1; + break; + } +} + +/// LangStds - Language standards we support. +enum LangStds { + lang_unspecified, + lang_c89, lang_c94, lang_c99, + lang_gnu89, lang_gnu99, + lang_cxx98, lang_gnucxx98 +}; + +static llvm::cl::opt<LangStds> +LangStd("std", llvm::cl::desc("Language standard to compile for"), + llvm::cl::init(lang_unspecified), + llvm::cl::values(clEnumValN(lang_c89, "c89", "ISO C 1990"), + clEnumValN(lang_c89, "c90", "ISO C 1990"), + clEnumValN(lang_c89, "iso9899:1990", "ISO C 1990"), + clEnumValN(lang_c94, "iso9899:199409", + "ISO C 1990 with amendment 1"), + clEnumValN(lang_c99, "c99", "ISO C 1999"), +// clEnumValN(lang_c99, "c9x", "ISO C 1999"), + clEnumValN(lang_c99, "iso9899:1999", "ISO C 1999"), +// clEnumValN(lang_c99, "iso9899:199x", "ISO C 1999"), + clEnumValN(lang_gnu89, "gnu89", + "ISO C 1990 with GNU extensions (default for C)"), + clEnumValN(lang_gnu99, "gnu99", + "ISO C 1999 with GNU extensions"), + clEnumValN(lang_gnu99, "gnu9x", + "ISO C 1999 with GNU extensions"), + clEnumValN(lang_cxx98, "c++98", + "ISO C++ 1998 with amendments"), + clEnumValN(lang_gnucxx98, "gnu++98", + "ISO C++ 1998 with amendments and GNU " + "extensions (default for C++)"), + clEnumValEnd)); + +static llvm::cl::opt<bool> +NoOperatorNames("fno-operator-names", + llvm::cl::desc("Do not treat C++ operator name keywords as " + "synonyms for operators")); + +// FIXME: add: +// -ansi +// -trigraphs +// -fdollars-in-identifiers +static void InitializeLanguageStandard(LangOptions &Options) { + if (LangStd == lang_unspecified) { + // Based on the base language, pick one. + switch (BaseLang) { + default: assert(0 && "Unknown base language"); + case langkind_c: + case langkind_c_cpp: + case langkind_objc: + case langkind_objc_cpp: + LangStd = lang_gnu99; + break; + case langkind_cxx: + case langkind_cxx_cpp: + case langkind_objcxx: + case langkind_objcxx_cpp: + LangStd = lang_gnucxx98; + break; + } + } + + switch (LangStd) { + default: assert(0 && "Unknown language standard!"); + + // Fall through from newer standards to older ones. This isn't really right. + // FIXME: Enable specifically the right features based on the language stds. + case lang_gnucxx98: + case lang_cxx98: + Options.CPlusPlus = 1; + Options.CXXOperatorNames = !NoOperatorNames; + // FALL THROUGH. + case lang_gnu99: + case lang_c99: + Options.Digraphs = 1; + Options.C99 = 1; + Options.HexFloats = 1; + // FALL THROUGH. + case lang_gnu89: + Options.BCPLComment = 1; // Only for C99/C++. + // FALL THROUGH. + case lang_c94: + case lang_c89: + break; + } + + Options.Trigraphs = 1; // -trigraphs or -ansi + Options.DollarIdents = 1; // FIXME: Really a target property. +} + +//===----------------------------------------------------------------------===// +// Our DiagnosticClient implementation +//===----------------------------------------------------------------------===// + +// FIXME: Werror should take a list of things, -Werror=foo,bar +static llvm::cl::opt<bool> +WarningsAsErrors("Werror", llvm::cl::desc("Treat all warnings as errors")); + +static llvm::cl::opt<bool> +WarnOnExtensions("pedantic", llvm::cl::init(false), + llvm::cl::desc("Issue a warning on uses of GCC extensions")); + +static llvm::cl::opt<bool> +ErrorOnExtensions("pedantic-errors", + llvm::cl::desc("Issue an error on uses of GCC extensions")); + +static llvm::cl::opt<bool> +WarnUnusedMacros("Wunused_macros", + llvm::cl::desc("Warn for unused macros in the main translation unit")); + + +/// InitializeDiagnostics - Initialize the diagnostic object, based on the +/// current command line option settings. +static void InitializeDiagnostics(Diagnostic &Diags) { + Diags.setWarningsAsErrors(WarningsAsErrors); + Diags.setWarnOnExtensions(WarnOnExtensions); + Diags.setErrorOnExtensions(ErrorOnExtensions); + + // Silence the "macro is not used" warning unless requested. + if (!WarnUnusedMacros) + Diags.setDiagnosticMapping(diag::pp_macro_not_used, diag::MAP_IGNORE); +} + +//===----------------------------------------------------------------------===// +// Preprocessor Initialization +//===----------------------------------------------------------------------===// + +// FIXME: Preprocessor builtins to support. +// -A... - Play with #assertions +// -undef - Undefine all predefined macros + +static llvm::cl::list<std::string> +D_macros("D", llvm::cl::value_desc("macro"), llvm::cl::Prefix, + llvm::cl::desc("Predefine the specified macro")); +static llvm::cl::list<std::string> +U_macros("U", llvm::cl::value_desc("macro"), llvm::cl::Prefix, + llvm::cl::desc("Undefine the specified macro")); + +// Append a #define line to Buf for Macro. Macro should be of the form XXX, +// in which case we emit "#define XXX 1" or "XXX=Y z W" in which case we emit +// "#define XXX Y z W". To get a #define with no value, use "XXX=". +static void DefineBuiltinMacro(std::vector<char> &Buf, const char *Macro, + const char *Command = "#define ") { + Buf.insert(Buf.end(), Command, Command+strlen(Command)); + if (const char *Equal = strchr(Macro, '=')) { + // Turn the = into ' '. + Buf.insert(Buf.end(), Macro, Equal); + Buf.push_back(' '); + Buf.insert(Buf.end(), Equal+1, Equal+strlen(Equal)); + } else { + // Push "macroname 1". + Buf.insert(Buf.end(), Macro, Macro+strlen(Macro)); + Buf.push_back(' '); + Buf.push_back('1'); + } + Buf.push_back('\n'); +} + +static void InitializePredefinedMacros(Preprocessor &PP, + std::vector<char> &Buf) { + // FIXME: Implement magic like cpp_init_builtins for things like __STDC__ + // and __DATE__ etc. +#if 0 + /* __STDC__ has the value 1 under normal circumstances. + However, if (a) we are in a system header, (b) the option + stdc_0_in_system_headers is true (set by target config), and + (c) we are not in strictly conforming mode, then it has the + value 0. (b) and (c) are already checked in cpp_init_builtins. */ + //case BT_STDC: + if (cpp_in_system_header (pfile)) + number = 0; + else + number = 1; + break; +#endif + // These should all be defined in the preprocessor according to the + // current language configuration. + DefineBuiltinMacro(Buf, "__STDC__=1"); + //DefineBuiltinMacro(Buf, "__ASSEMBLER__=1"); + if (PP.getLangOptions().C99) + DefineBuiltinMacro(Buf, "__STDC_VERSION__=199901L"); + else + DefineBuiltinMacro(Buf, "__STDC_VERSION__=199409L"); + + DefineBuiltinMacro(Buf, "__STDC_HOSTED__=1"); + if (PP.getLangOptions().ObjC1) + DefineBuiltinMacro(Buf, "__OBJC__=1"); + if (PP.getLangOptions().ObjC2) + DefineBuiltinMacro(Buf, "__OBJC2__=1"); + + // Get the target #defines. + PP.getTargetInfo().getTargetDefines(Buf); + + // Compiler set macros. + DefineBuiltinMacro(Buf, "__APPLE_CC__=5250"); + DefineBuiltinMacro(Buf, "__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__=1030"); + DefineBuiltinMacro(Buf, "__GNUC_MINOR__=0"); + DefineBuiltinMacro(Buf, "__GNUC_PATCHLEVEL__=1"); + DefineBuiltinMacro(Buf, "__GNUC__=4"); + DefineBuiltinMacro(Buf, "__GXX_ABI_VERSION=1002"); + DefineBuiltinMacro(Buf, "__VERSION__=\"4.0.1 (Apple Computer, Inc. " + "build 5250)\""); + + // Build configuration options. + DefineBuiltinMacro(Buf, "__DYNAMIC__=1"); + DefineBuiltinMacro(Buf, "__FINITE_MATH_ONLY__=0"); + DefineBuiltinMacro(Buf, "__NO_INLINE__=1"); + DefineBuiltinMacro(Buf, "__PIC__=1"); + + + if (PP.getLangOptions().CPlusPlus) { + DefineBuiltinMacro(Buf, "__DEPRECATED=1"); + DefineBuiltinMacro(Buf, "__EXCEPTIONS=1"); + DefineBuiltinMacro(Buf, "__GNUG__=4"); + DefineBuiltinMacro(Buf, "__GXX_WEAK__=1"); + DefineBuiltinMacro(Buf, "__cplusplus=1"); + DefineBuiltinMacro(Buf, "__private_extern__=extern"); + } + + // FIXME: Should emit a #line directive here. + + // Add macros from the command line. + // FIXME: Should traverse the #define/#undef lists in parallel. + for (unsigned i = 0, e = D_macros.size(); i != e; ++i) + DefineBuiltinMacro(Buf, D_macros[i].c_str()); + for (unsigned i = 0, e = U_macros.size(); i != e; ++i) + DefineBuiltinMacro(Buf, U_macros[i].c_str(), "#undef "); +} + +//===----------------------------------------------------------------------===// +// Preprocessor include path information. +//===----------------------------------------------------------------------===// + +// This tool exports a large number of command line options to control how the +// preprocessor searches for header files. At root, however, the Preprocessor +// object takes a very simple interface: a list of directories to search for +// +// FIXME: -nostdinc,-nostdinc++ +// FIXME: -isysroot,-imultilib +// +// FIXME: -include,-imacros + +static llvm::cl::opt<bool> +nostdinc("nostdinc", llvm::cl::desc("Disable standard #include directories")); + +// Various command line options. These four add directories to each chain. +static llvm::cl::list<std::string> +F_dirs("F", llvm::cl::value_desc("directory"), llvm::cl::Prefix, + llvm::cl::desc("Add directory to framework include search path")); +static llvm::cl::list<std::string> +I_dirs("I", llvm::cl::value_desc("directory"), llvm::cl::Prefix, + llvm::cl::desc("Add directory to include search path")); +static llvm::cl::list<std::string> +idirafter_dirs("idirafter", llvm::cl::value_desc("directory"), llvm::cl::Prefix, + llvm::cl::desc("Add directory to AFTER include search path")); +static llvm::cl::list<std::string> +iquote_dirs("iquote", llvm::cl::value_desc("directory"), llvm::cl::Prefix, + llvm::cl::desc("Add directory to QUOTE include search path")); +static llvm::cl::list<std::string> +isystem_dirs("isystem", llvm::cl::value_desc("directory"), llvm::cl::Prefix, + llvm::cl::desc("Add directory to SYSTEM include search path")); + +// These handle -iprefix/-iwithprefix/-iwithprefixbefore. +static llvm::cl::list<std::string> +iprefix_vals("iprefix", llvm::cl::value_desc("prefix"), llvm::cl::Prefix, + llvm::cl::desc("Set the -iwithprefix/-iwithprefixbefore prefix")); +static llvm::cl::list<std::string> +iwithprefix_vals("iwithprefix", llvm::cl::value_desc("dir"), llvm::cl::Prefix, + llvm::cl::desc("Set directory to SYSTEM include search path with prefix")); +static llvm::cl::list<std::string> +iwithprefixbefore_vals("iwithprefixbefore", llvm::cl::value_desc("dir"), + llvm::cl::Prefix, + llvm::cl::desc("Set directory to include search path with prefix")); + +// Finally, implement the code that groks the options above. +enum IncludeDirGroup { + Quoted = 0, + Angled, + System, + After +}; + +static std::vector<DirectoryLookup> IncludeGroup[4]; + +/// AddPath - Add the specified path to the specified group list. +/// +static void AddPath(const std::string &Path, IncludeDirGroup Group, + bool isCXXAware, bool isUserSupplied, + bool isFramework, FileManager &FM) { + const DirectoryEntry *DE = FM.getDirectory(Path); + if (DE == 0) { + if (Verbose) + fprintf(stderr, "ignoring nonexistent directory \"%s\"\n", + Path.c_str()); + return; + } + + DirectoryLookup::DirType Type; + if (Group == Quoted || Group == Angled) + Type = DirectoryLookup::NormalHeaderDir; + else if (isCXXAware) + Type = DirectoryLookup::SystemHeaderDir; + else + Type = DirectoryLookup::ExternCSystemHeaderDir; + + IncludeGroup[Group].push_back(DirectoryLookup(DE, Type, isUserSupplied, + isFramework)); +} + +/// RemoveDuplicates - If there are duplicate directory entries in the specified +/// search list, remove the later (dead) ones. +static void RemoveDuplicates(std::vector<DirectoryLookup> &SearchList) { + std::set<const DirectoryEntry *> SeenDirs; + for (unsigned i = 0; i != SearchList.size(); ++i) { + // If this isn't the first time we've seen this dir, remove it. + if (!SeenDirs.insert(SearchList[i].getDir()).second) { + if (Verbose) + fprintf(stderr, "ignoring duplicate directory \"%s\"\n", + SearchList[i].getDir()->getName()); + SearchList.erase(SearchList.begin()+i); + --i; + } + } +} + +/// InitializeIncludePaths - Process the -I options and set them in the +/// HeaderSearch object. +static void InitializeIncludePaths(HeaderSearch &Headers, FileManager &FM, + Diagnostic &Diags, const LangOptions &Lang) { + // Handle -F... options. + for (unsigned i = 0, e = F_dirs.size(); i != e; ++i) + AddPath(F_dirs[i], Angled, false, true, true, FM); + + // Handle -I... options. + for (unsigned i = 0, e = I_dirs.size(); i != e; ++i) { + if (I_dirs[i] == "-") { + // -I- is a deprecated GCC feature. + Diags.Report(SourceLocation(), diag::err_pp_I_dash_not_supported); + } else { + AddPath(I_dirs[i], Angled, false, true, false, FM); + } + } + + // Handle -idirafter... options. + for (unsigned i = 0, e = idirafter_dirs.size(); i != e; ++i) + AddPath(idirafter_dirs[i], After, false, true, false, FM); + + // Handle -iquote... options. + for (unsigned i = 0, e = iquote_dirs.size(); i != e; ++i) + AddPath(iquote_dirs[i], Quoted, false, true, false, FM); + + // Handle -isystem... options. + for (unsigned i = 0, e = isystem_dirs.size(); i != e; ++i) + AddPath(isystem_dirs[i], System, false, true, false, FM); + + // Walk the -iprefix/-iwithprefix/-iwithprefixbefore argument lists in + // parallel, processing the values in order of occurance to get the right + // prefixes. + { + std::string Prefix = ""; // FIXME: this isn't the correct default prefix. + unsigned iprefix_idx = 0; + unsigned iwithprefix_idx = 0; + unsigned iwithprefixbefore_idx = 0; + bool iprefix_done = iprefix_vals.empty(); + bool iwithprefix_done = iwithprefix_vals.empty(); + bool iwithprefixbefore_done = iwithprefixbefore_vals.empty(); + while (!iprefix_done || !iwithprefix_done || !iwithprefixbefore_done) { + if (!iprefix_done && + (iwithprefix_done || + iprefix_vals.getPosition(iprefix_idx) < + iwithprefix_vals.getPosition(iwithprefix_idx)) && + (iwithprefixbefore_done || + iprefix_vals.getPosition(iprefix_idx) < + iwithprefixbefore_vals.getPosition(iwithprefixbefore_idx))) { + Prefix = iprefix_vals[iprefix_idx]; + ++iprefix_idx; + iprefix_done = iprefix_idx == iprefix_vals.size(); + } else if (!iwithprefix_done && + (iwithprefixbefore_done || + iwithprefix_vals.getPosition(iwithprefix_idx) < + iwithprefixbefore_vals.getPosition(iwithprefixbefore_idx))) { + AddPath(Prefix+iwithprefix_vals[iwithprefix_idx], + System, false, false, false, FM); + ++iwithprefix_idx; + iwithprefix_done = iwithprefix_idx == iwithprefix_vals.size(); + } else { + AddPath(Prefix+iwithprefixbefore_vals[iwithprefixbefore_idx], + Angled, false, false, false, FM); + ++iwithprefixbefore_idx; + iwithprefixbefore_done = + iwithprefixbefore_idx == iwithprefixbefore_vals.size(); + } + } + } + + // FIXME: Add contents of the CPATH, C_INCLUDE_PATH, CPLUS_INCLUDE_PATH, + // OBJC_INCLUDE_PATH, OBJCPLUS_INCLUDE_PATH environment variables. + + // FIXME: temporary hack: hard-coded paths. + // FIXME: get these from the target? + if (!nostdinc) { + if (Lang.CPlusPlus) { + AddPath("/usr/include/c++/4.0.0", System, true, false, false, FM); + AddPath("/usr/include/c++/4.0.0/i686-apple-darwin8", System, true, false, + false, FM); + AddPath("/usr/include/c++/4.0.0/backward", System, true, false, false,FM); + } + + AddPath("/usr/local/include", System, false, false, false, FM); + // leopard + AddPath("/usr/lib/gcc/i686-apple-darwin9/4.0.1/include", System, + false, false, false, FM); + AddPath("/usr/lib/gcc/powerpc-apple-darwin9/4.0.1/include", + System, false, false, false, FM); + AddPath("/usr/lib/gcc/powerpc-apple-darwin9/" + "4.0.1/../../../../powerpc-apple-darwin0/include", + System, false, false, false, FM); + + // tiger + AddPath("/usr/lib/gcc/i686-apple-darwin8/4.0.1/include", System, + false, false, false, FM); + AddPath("/usr/lib/gcc/powerpc-apple-darwin8/4.0.1/include", + System, false, false, false, FM); + AddPath("/usr/lib/gcc/powerpc-apple-darwin8/" + "4.0.1/../../../../powerpc-apple-darwin8/include", + System, false, false, false, FM); + + AddPath("/usr/include", System, false, false, false, FM); + AddPath("/System/Library/Frameworks", System, true, false, true, FM); + AddPath("/Library/Frameworks", System, true, false, true, FM); + } + + // Now that we have collected all of the include paths, merge them all + // together and tell the preprocessor about them. + + // Concatenate ANGLE+SYSTEM+AFTER chains together into SearchList. + std::vector<DirectoryLookup> SearchList; + SearchList = IncludeGroup[Angled]; + SearchList.insert(SearchList.end(), IncludeGroup[System].begin(), + IncludeGroup[System].end()); + SearchList.insert(SearchList.end(), IncludeGroup[After].begin(), + IncludeGroup[After].end()); + RemoveDuplicates(SearchList); + RemoveDuplicates(IncludeGroup[Quoted]); + + // Prepend QUOTED list on the search list. + SearchList.insert(SearchList.begin(), IncludeGroup[Quoted].begin(), + IncludeGroup[Quoted].end()); + + + bool DontSearchCurDir = false; // TODO: set to true if -I- is set? + Headers.SetSearchPaths(SearchList, IncludeGroup[Quoted].size(), + DontSearchCurDir); + + // If verbose, print the list of directories that will be searched. + if (Verbose) { + fprintf(stderr, "#include \"...\" search starts here:\n"); + unsigned QuotedIdx = IncludeGroup[Quoted].size(); + for (unsigned i = 0, e = SearchList.size(); i != e; ++i) { + if (i == QuotedIdx) + fprintf(stderr, "#include <...> search starts here:\n"); + fprintf(stderr, " %s\n", SearchList[i].getDir()->getName()); + } + } +} + + +// Read any files specified by -imacros or -include. +static void ReadPrologFiles(Preprocessor &PP, std::vector<char> &Buf) { + // FIXME: IMPLEMENT +} + +//===----------------------------------------------------------------------===// +// Basic Parser driver +//===----------------------------------------------------------------------===// + +static void ParseFile(Preprocessor &PP, MinimalAction *PA, unsigned MainFileID){ + Parser P(PP, *PA); + PP.EnterSourceFile(MainFileID, 0, true); + + // Parsing the specified input file. + P.ParseTranslationUnit(); + delete PA; +} + +//===----------------------------------------------------------------------===// +// Main driver +//===----------------------------------------------------------------------===// + +/// InitializePreprocessor - Initialize the preprocessor getting it and the +/// environment ready to process a single file. This returns the file ID for the +/// input file. If a failure happens, it returns 0. +/// +static unsigned InitializePreprocessor(Preprocessor &PP, + const std::string &InFile, + SourceManager &SourceMgr, + HeaderSearch &HeaderInfo, + const LangOptions &LangInfo, + std::vector<char> &PrologMacros) { + FileManager &FileMgr = HeaderInfo.getFileMgr(); + + // Install things like __POWERPC__, __GNUC__, etc into the macro table. + InitializePredefinedMacros(PP, PrologMacros); + + // Read any files specified by -imacros or -include. + ReadPrologFiles(PP, PrologMacros); + + // Figure out where to get and map in the main file. + unsigned MainFileID = 0; + if (InFile != "-") { + const FileEntry *File = FileMgr.getFile(InFile); + if (File) MainFileID = SourceMgr.createFileID(File, SourceLocation()); + if (MainFileID == 0) { + fprintf(stderr, "Error reading '%s'!\n",InFile.c_str()); + return 0; + } + } else { + llvm::MemoryBuffer *SB = llvm::MemoryBuffer::getSTDIN(); + if (SB) MainFileID = SourceMgr.createFileIDForMemBuffer(SB); + if (MainFileID == 0) { + fprintf(stderr, "Error reading standard input! Empty?\n"); + return 0; + } + } + + // Now that we have emitted the predefined macros, #includes, etc into + // PrologMacros, preprocess it to populate the initial preprocessor state. + + // Memory buffer must end with a null byte! + PrologMacros.push_back(0); + + llvm::MemoryBuffer *SB = + llvm::MemoryBuffer::getMemBuffer(&PrologMacros.front(),&PrologMacros.back(), + "<predefines>"); + assert(SB && "Cannot fail to create predefined source buffer"); + unsigned FileID = SourceMgr.createFileIDForMemBuffer(SB); + assert(FileID && "Could not create FileID for predefines?"); + + // Start parsing the predefines. + PP.EnterSourceFile(FileID, 0); + + // Lex the file, which will read all the macros. + LexerToken Tok; + PP.Lex(Tok); + assert(Tok.getKind() == tok::eof && "Didn't read entire file!"); + + // Once we've read this, we're done. + return MainFileID; +} + +/// ProcessInputFile - Process a single input file with the specified state. +/// +static void ProcessInputFile(Preprocessor &PP, unsigned MainFileID, + const std::string &InFile, + SourceManager &SourceMgr, + TextDiagnostics &OurDiagnosticClient, + HeaderSearch &HeaderInfo, + const LangOptions &LangInfo) { + switch (ProgAction) { + default: + fprintf(stderr, "Unexpected program action!\n"); + return; + case DumpTokens: { // Token dump mode. + LexerToken Tok; + // Start parsing the specified input file. + PP.EnterSourceFile(MainFileID, 0, true); + do { + PP.Lex(Tok); + PP.DumpToken(Tok, true); + fprintf(stderr, "\n"); + } while (Tok.getKind() != tok::eof); + break; + } + case RunPreprocessorOnly: { // Just lex as fast as we can, no output. + LexerToken Tok; + // Start parsing the specified input file. + PP.EnterSourceFile(MainFileID, 0, true); + do { + PP.Lex(Tok); + } while (Tok.getKind() != tok::eof); + break; + } + + case PrintPreprocessedInput: // -E mode. + DoPrintPreprocessedInput(MainFileID, PP, LangInfo); + break; + + case ParseNoop: // -parse-noop + ParseFile(PP, new MinimalAction(), MainFileID); + break; + + case ParsePrintCallbacks: + ParseFile(PP, CreatePrintParserActionsAction(), MainFileID); + break; + case ParseSyntaxOnly: // -fsyntax-only + case ParseAST: + BuildASTs(PP, MainFileID, Stats); + break; + case ParseASTPrint: + PrintASTs(PP, MainFileID, Stats); + break; + case EmitLLVM: + EmitLLVMFromASTs(PP, MainFileID, Stats); + break; + case ParseASTCheck: + exit(CheckDiagnostics(PP, MainFileID)); + break; + } + + if (Stats) { + fprintf(stderr, "\nSTATISTICS FOR '%s':\n", InFile.c_str()); + PP.PrintStats(); + PP.getIdentifierTable().PrintStats(); + HeaderInfo.PrintStats(); + fprintf(stderr, "\n"); + } +} + +static llvm::cl::list<std::string> +InputFilenames(llvm::cl::Positional, llvm::cl::desc("<input files>")); + + +int main(int argc, char **argv) { + llvm::cl::ParseCommandLineOptions(argc, argv, " llvm cfe\n"); + llvm::sys::PrintStackTraceOnErrorSignal(); + + // If no input was specified, read from stdin. + if (InputFilenames.empty()) + InputFilenames.push_back("-"); + + /// Create a SourceManager object. This tracks and owns all the file buffers + /// allocated to the program. + SourceManager SourceMgr; + + // Create a file manager object to provide access to and cache the filesystem. + FileManager FileMgr; + + // Initialize language options, inferring file types from input filenames. + // FIXME: This infers info from the first file, we should clump by language + // to handle 'x.c y.c a.cpp b.cpp'. + LangOptions LangInfo; + InitializeBaseLanguage(LangInfo, InputFilenames[0]); + InitializeLanguageStandard(LangInfo); + + std::auto_ptr<TextDiagnostics> DiagClient; + if (ProgAction != ParseASTCheck) { + // Print diagnostics to stderr by default. + DiagClient.reset(new TextDiagnosticPrinter(SourceMgr)); + } else { + // When checking diagnostics, just buffer them up. + DiagClient.reset(new TextDiagnosticBuffer(SourceMgr)); + + if (InputFilenames.size() != 1) { + fprintf(stderr, + "parse-ast-check only works on single input files for now.\n"); + return 1; + } + } + + // Configure our handling of diagnostics. + Diagnostic Diags(*DiagClient); + InitializeDiagnostics(Diags); + + // Get information about the targets being compiled for. Note that this + // pointer and the TargetInfoImpl objects are never deleted by this toy + // driver. + TargetInfo *Target = CreateTargetInfo(Diags); + if (Target == 0) { + fprintf(stderr, + "Sorry, don't know what target this is, please use -arch.\n"); + exit(1); + } + + // Process the -I options and set them in the HeaderInfo. + HeaderSearch HeaderInfo(FileMgr); + DiagClient->setHeaderSearch(HeaderInfo); + InitializeIncludePaths(HeaderInfo, FileMgr, Diags, LangInfo); + + for (unsigned i = 0, e = InputFilenames.size(); i != e; ++i) { + // Set up the preprocessor with these options. + Preprocessor PP(Diags, LangInfo, *Target, SourceMgr, HeaderInfo); + DiagClient->setPreprocessor(PP); + const std::string &InFile = InputFilenames[i]; + std::vector<char> PrologMacros; + unsigned MainFileID = InitializePreprocessor(PP, InFile, SourceMgr, + HeaderInfo, LangInfo, + PrologMacros); + + if (!MainFileID) continue; + + ProcessInputFile(PP, MainFileID, InFile, SourceMgr, + *DiagClient, HeaderInfo, LangInfo); + HeaderInfo.ClearFileInfo(); + } + + unsigned NumDiagnostics = Diags.getNumDiagnostics(); + + if (NumDiagnostics) + fprintf(stderr, "%d diagnostic%s generated.\n", NumDiagnostics, + (NumDiagnostics == 1 ? "" : "s")); + + if (Stats) { + // Printed from high-to-low level. + SourceMgr.PrintStats(); + FileMgr.PrintStats(); + fprintf(stderr, "\n"); + } + + return Diags.getNumErrors(); +} diff --git a/Driver/clang.h b/Driver/clang.h new file mode 100644 index 0000000000..717d8886fe --- /dev/null +++ b/Driver/clang.h @@ -0,0 +1,45 @@ +//===--- clang.h - C-Language Front-end -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the header file that pulls together the top-level driver. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_CLANG_H +#define LLVM_CLANG_CLANG_H + +namespace clang { +class Preprocessor; +class LangOptions; +class MinimalAction; +class TargetInfo; +class Diagnostic; + +/// DoPrintPreprocessedInput - Implement -E mode. +void DoPrintPreprocessedInput(unsigned MainFileID, Preprocessor &PP, + const LangOptions &Options); + +/// CreatePrintParserActionsAction - Return the actions implementation that +/// implements the -parse-print-callbacks option. +MinimalAction *CreatePrintParserActionsAction(); + +/// CreateTargetInfo - Return the set of target info objects as specified by +/// the -arch command line option. +TargetInfo *CreateTargetInfo(Diagnostic &Diags); + +/// EmitLLVMFromASTs - Implement -emit-llvm, which generates llvm IR from C. +void EmitLLVMFromASTs(Preprocessor &PP, unsigned MainFileID, + bool PrintStats); + +/// CheckDiagnostics - Implement the -parse-ast-check diagnostic verifier. +bool CheckDiagnostics(Preprocessor &PP, unsigned MainFileID); + +} // end namespace clang + +#endif diff --git a/INPUTS/Cocoa_h.m b/INPUTS/Cocoa_h.m new file mode 100644 index 0000000000..e6ba59924d --- /dev/null +++ b/INPUTS/Cocoa_h.m @@ -0,0 +1,2 @@ + +#import <Cocoa/Cocoa.h> diff --git a/INPUTS/carbon_h.c b/INPUTS/carbon_h.c new file mode 100644 index 0000000000..599f123a36 --- /dev/null +++ b/INPUTS/carbon_h.c @@ -0,0 +1,4 @@ + +#include <Carbon/Carbon.h> + +//#import<vecLib/vecLib.h> diff --git a/INPUTS/iostream.cc b/INPUTS/iostream.cc new file mode 100644 index 0000000000..eb12fc9aaf --- /dev/null +++ b/INPUTS/iostream.cc @@ -0,0 +1,5 @@ +// clang -I/usr/include/c++/4.0.0 -I/usr/include/c++/4.0.0/powerpc-apple-darwin8 -I/usr/include/c++/4.0.0/backward INPUTS/iostream.cc -Eonly + +#include <iostream> + +#include <stdint.h> diff --git a/INPUTS/macro_pounder_fn.c b/INPUTS/macro_pounder_fn.c new file mode 100644 index 0000000000..73f40a1d6d --- /dev/null +++ b/INPUTS/macro_pounder_fn.c @@ -0,0 +1,17 @@ + +// This pounds on macro expansion for performance reasons. This is currently +// heavily constrained by darwin's malloc. + +// Function-like macros. +#define A0(A, B) A B +#define A1(A, B) A0(A,B) A0(A,B) A0(A,B) A0(A,B) A0(A,B) A0(A,B) +#define A2(A, B) A1(A,B) A1(A,B) A1(A,B) A1(A,B) A1(A,B) A1(A,B) +#define A3(A, B) A2(A,B) A2(A,B) A2(A,B) A2(A,B) A2(A,B) A2(A,B) +#define A4(A, B) A3(A,B) A3(A,B) A3(A,B) A3(A,B) A3(A,B) A3(A,B) +#define A5(A, B) A4(A,B) A4(A,B) A4(A,B) A4(A,B) A4(A,B) A4(A,B) +#define A6(A, B) A5(A,B) A5(A,B) A5(A,B) A5(A,B) A5(A,B) A5(A,B) +#define A7(A, B) A6(A,B) A6(A,B) A6(A,B) A6(A,B) A6(A,B) A6(A,B) +#define A8(A, B) A7(A,B) A7(A,B) A7(A,B) A7(A,B) A7(A,B) A7(A,B) + +A8(a, b) + diff --git a/INPUTS/macro_pounder_obj.c b/INPUTS/macro_pounder_obj.c new file mode 100644 index 0000000000..d2465f34ed --- /dev/null +++ b/INPUTS/macro_pounder_obj.c @@ -0,0 +1,16 @@ + +// This pounds on macro expansion for performance reasons. This is currently +// heavily constrained by darwin's malloc. + +// Object-like expansions +#define A0 a b +#define A1 A0 A0 A0 A0 A0 A0 +#define A2 A1 A1 A1 A1 A1 A1 +#define A3 A2 A2 A2 A2 A2 A2 +#define A4 A3 A3 A3 A3 A3 A3 +#define A5 A4 A4 A4 A4 A4 A4 +#define A6 A5 A5 A5 A5 A5 A5 +#define A7 A6 A6 A6 A6 A6 A6 +#define A8 A7 A7 A7 A7 A7 A7 + +A8 diff --git a/Lex/HeaderSearch.cpp b/Lex/HeaderSearch.cpp new file mode 100644 index 0000000000..520205e1da --- /dev/null +++ b/Lex/HeaderSearch.cpp @@ -0,0 +1,319 @@ +//===--- HeaderSearch.cpp - Resolve Header File Locations ---===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the DirectoryLookup and HeaderSearch interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/FileManager.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/IdentifierTable.h" +#include "llvm/System/Path.h" +#include "llvm/ADT/SmallString.h" +using namespace clang; + +HeaderSearch::HeaderSearch(FileManager &FM) : FileMgr(FM), FrameworkMap(64) { + SystemDirIdx = 0; + NoCurDirSearch = false; + + NumIncluded = 0; + NumMultiIncludeFileOptzn = 0; + NumFrameworkLookups = NumSubFrameworkLookups = 0; +} + +void HeaderSearch::PrintStats() { + fprintf(stderr, "\n*** HeaderSearch Stats:\n"); + fprintf(stderr, "%d files tracked.\n", (int)FileInfo.size()); + unsigned NumOnceOnlyFiles = 0, MaxNumIncludes = 0, NumSingleIncludedFiles = 0; + for (unsigned i = 0, e = FileInfo.size(); i != e; ++i) { + NumOnceOnlyFiles += FileInfo[i].isImport; + if (MaxNumIncludes < FileInfo[i].NumIncludes) + MaxNumIncludes = FileInfo[i].NumIncludes; + NumSingleIncludedFiles += FileInfo[i].NumIncludes == 1; + } + fprintf(stderr, " %d #import/#pragma once files.\n", NumOnceOnlyFiles); + fprintf(stderr, " %d included exactly once.\n", NumSingleIncludedFiles); + fprintf(stderr, " %d max times a file is included.\n", MaxNumIncludes); + + fprintf(stderr, " %d #include/#include_next/#import.\n", NumIncluded); + fprintf(stderr, " %d #includes skipped due to" + " the multi-include optimization.\n", NumMultiIncludeFileOptzn); + + fprintf(stderr, "%d framework lookups.\n", NumFrameworkLookups); + fprintf(stderr, "%d subframework lookups.\n", NumSubFrameworkLookups); +} + +//===----------------------------------------------------------------------===// +// Header File Location. +//===----------------------------------------------------------------------===// + +const FileEntry *HeaderSearch::DoFrameworkLookup(const DirectoryEntry *Dir, + const char *FilenameStart, + const char *FilenameEnd) { + // Framework names must have a '/' in the filename. + const char *SlashPos = std::find(FilenameStart, FilenameEnd, '/'); + if (SlashPos == FilenameEnd) return 0; + + llvm::StringMapEntry<const DirectoryEntry *> &CacheLookup = + FrameworkMap.GetOrCreateValue(FilenameStart, SlashPos); + + // If it is some other directory, fail. + if (CacheLookup.getValue() && CacheLookup.getValue() != Dir) + return 0; + + // FrameworkName = "/System/Library/Frameworks/" + llvm::SmallString<1024> FrameworkName; + FrameworkName += Dir->getName(); + if (FrameworkName.empty() || FrameworkName.back() != '/') + FrameworkName.push_back('/'); + + // FrameworkName = "/System/Library/Frameworks/Cocoa" + FrameworkName.append(FilenameStart, SlashPos); + + // FrameworkName = "/System/Library/Frameworks/Cocoa.framework/" + FrameworkName += ".framework/"; + + if (CacheLookup.getValue() == 0) { + ++NumFrameworkLookups; + + // If the framework dir doesn't exist, we fail. + if (!llvm::sys::Path(std::string(FrameworkName.begin(), + FrameworkName.end())).exists()) + return 0; + + // Otherwise, if it does, remember that this is the right direntry for this + // framework. + CacheLookup.setValue(Dir); + } + + // Check "/System/Library/Frameworks/Cocoa.framework/Headers/file.h" + unsigned OrigSize = FrameworkName.size(); + + FrameworkName += "Headers/"; + FrameworkName.append(SlashPos+1, FilenameEnd); + if (const FileEntry *FE = FileMgr.getFile(FrameworkName.begin(), + FrameworkName.end())) { + return FE; + } + + // Check "/System/Library/Frameworks/Cocoa.framework/PrivateHeaders/file.h" + const char *Private = "Private"; + FrameworkName.insert(FrameworkName.begin()+OrigSize, Private, + Private+strlen(Private)); + return FileMgr.getFile(FrameworkName.begin(), FrameworkName.end()); +} + +/// LookupFile - Given a "foo" or <foo> reference, look up the indicated file, +/// return null on failure. isAngled indicates whether the file reference is +/// for system #include's or not (i.e. using <> instead of ""). CurFileEnt, if +/// non-null, indicates where the #including file is, in case a relative search +/// is needed. +const FileEntry *HeaderSearch::LookupFile(const char *FilenameStart, + const char *FilenameEnd, + bool isAngled, + const DirectoryLookup *FromDir, + const DirectoryLookup *&CurDir, + const FileEntry *CurFileEnt) { + // If 'Filename' is absolute, check to see if it exists and no searching. + // FIXME: Portability. This should be a sys::Path interface, this doesn't + // handle things like C:\foo.txt right, nor win32 \\network\device\blah. + if (FilenameStart[0] == '/') { + CurDir = 0; + + // If this was an #include_next "/absolute/file", fail. + if (FromDir) return 0; + + // Otherwise, just return the file. + return FileMgr.getFile(FilenameStart, FilenameEnd); + } + + llvm::SmallString<1024> TmpDir; + + // Step #0, unless disabled, check to see if the file is in the #includer's + // directory. This search is not done for <> headers. + if (CurFileEnt && !isAngled && !NoCurDirSearch) { + // Concatenate the requested file onto the directory. + // FIXME: Portability. Filename concatenation should be in sys::Path. + TmpDir += CurFileEnt->getDir()->getName(); + TmpDir.push_back('/'); + TmpDir.append(FilenameStart, FilenameEnd); + if (const FileEntry *FE = FileMgr.getFile(TmpDir.begin(), TmpDir.end())) { + // Leave CurDir unset. + + // This file is a system header or C++ unfriendly if the old file is. + getFileInfo(FE).DirInfo = getFileInfo(CurFileEnt).DirInfo; + return FE; + } + TmpDir.clear(); + } + + CurDir = 0; + + // If this is a system #include, ignore the user #include locs. + unsigned i = isAngled ? SystemDirIdx : 0; + + // If this is a #include_next request, start searching after the directory the + // file was found in. + if (FromDir) + i = FromDir-&SearchDirs[0]; + + // Check each directory in sequence to see if it contains this file. + for (; i != SearchDirs.size(); ++i) { + const FileEntry *FE = 0; + if (!SearchDirs[i].isFramework()) { + // FIXME: Portability. Adding file to dir should be in sys::Path. + // Concatenate the requested file onto the directory. + TmpDir.clear(); + TmpDir += SearchDirs[i].getDir()->getName(); + TmpDir.push_back('/'); + TmpDir.append(FilenameStart, FilenameEnd); + FE = FileMgr.getFile(TmpDir.begin(), TmpDir.end()); + } else { + FE = DoFrameworkLookup(SearchDirs[i].getDir(), FilenameStart,FilenameEnd); + } + + if (FE) { + CurDir = &SearchDirs[i]; + + // This file is a system header or C++ unfriendly if the dir is. + getFileInfo(FE).DirInfo = CurDir->getDirCharacteristic(); + return FE; + } + } + + // Otherwise, didn't find it. + return 0; +} + +/// LookupSubframeworkHeader - Look up a subframework for the specified +/// #include file. For example, if #include'ing <HIToolbox/HIToolbox.h> from +/// within ".../Carbon.framework/Headers/Carbon.h", check to see if HIToolbox +/// is a subframework within Carbon.framework. If so, return the FileEntry +/// for the designated file, otherwise return null. +const FileEntry *HeaderSearch:: +LookupSubframeworkHeader(const char *FilenameStart, + const char *FilenameEnd, + const FileEntry *ContextFileEnt) { + // Framework names must have a '/' in the filename. Find it. + const char *SlashPos = std::find(FilenameStart, FilenameEnd, '/'); + if (SlashPos == FilenameEnd) return 0; + + // Look up the base framework name of the ContextFileEnt. + const char *ContextName = ContextFileEnt->getName(); + + // If the context info wasn't a framework, couldn't be a subframework. + const char *FrameworkPos = strstr(ContextName, ".framework/"); + if (FrameworkPos == 0) + return 0; + + llvm::SmallString<1024> FrameworkName(ContextName, + FrameworkPos+strlen(".framework/")); + + // Append Frameworks/HIToolbox.framework/ + FrameworkName += "Frameworks/"; + FrameworkName.append(FilenameStart, SlashPos); + FrameworkName += ".framework/"; + + llvm::StringMapEntry<const DirectoryEntry *> &CacheLookup = + FrameworkMap.GetOrCreateValue(FilenameStart, SlashPos); + + // Some other location? + if (CacheLookup.getValue() && + CacheLookup.getKeyLength() == FrameworkName.size() && + memcmp(CacheLookup.getKeyData(), &FrameworkName[0], + CacheLookup.getKeyLength()) != 0) + return 0; + + // Cache subframework. + if (CacheLookup.getValue() == 0) { + ++NumSubFrameworkLookups; + + // If the framework dir doesn't exist, we fail. + const DirectoryEntry *Dir = FileMgr.getDirectory(FrameworkName.begin(), + FrameworkName.end()); + if (Dir == 0) return 0; + + // Otherwise, if it does, remember that this is the right direntry for this + // framework. + CacheLookup.setValue(Dir); + } + + const FileEntry *FE = 0; + + // Check ".../Frameworks/HIToolbox.framework/Headers/HIToolbox.h" + llvm::SmallString<1024> HeadersFilename(FrameworkName); + HeadersFilename += "Headers/"; + HeadersFilename.append(SlashPos+1, FilenameEnd); + if (!(FE = FileMgr.getFile(HeadersFilename.begin(), + HeadersFilename.end()))) { + + // Check ".../Frameworks/HIToolbox.framework/PrivateHeaders/HIToolbox.h" + HeadersFilename = FrameworkName; + HeadersFilename += "PrivateHeaders/"; + HeadersFilename.append(SlashPos+1, FilenameEnd); + if (!(FE = FileMgr.getFile(HeadersFilename.begin(), HeadersFilename.end()))) + return 0; + } + + // This file is a system header or C++ unfriendly if the old file is. + getFileInfo(FE).DirInfo = getFileInfo(ContextFileEnt).DirInfo; + return FE; +} + +//===----------------------------------------------------------------------===// +// File Info Management. +//===----------------------------------------------------------------------===// + + +/// getFileInfo - Return the PerFileInfo structure for the specified +/// FileEntry. +HeaderSearch::PerFileInfo &HeaderSearch::getFileInfo(const FileEntry *FE) { + if (FE->getUID() >= FileInfo.size()) + FileInfo.resize(FE->getUID()+1); + return FileInfo[FE->getUID()]; +} + +/// ShouldEnterIncludeFile - Mark the specified file as a target of of a +/// #include, #include_next, or #import directive. Return false if #including +/// the file will have no effect or true if we should include it. +bool HeaderSearch::ShouldEnterIncludeFile(const FileEntry *File, bool isImport){ + ++NumIncluded; // Count # of attempted #includes. + + // Get information about this file. + PerFileInfo &FileInfo = getFileInfo(File); + + // If this is a #import directive, check that we have not already imported + // this header. + if (isImport) { + // If this has already been imported, don't import it again. + FileInfo.isImport = true; + + // Has this already been #import'ed or #include'd? + if (FileInfo.NumIncludes) return false; + } else { + // Otherwise, if this is a #include of a file that was previously #import'd + // or if this is the second #include of a #pragma once file, ignore it. + if (FileInfo.isImport) + return false; + } + + // Next, check to see if the file is wrapped with #ifndef guards. If so, and + // if the macro that guards it is defined, we know the #include has no effect. + if (FileInfo.ControllingMacro && FileInfo.ControllingMacro->getMacroInfo()) { + ++NumMultiIncludeFileOptzn; + return false; + } + + // Increment the number of times this file has been included. + ++FileInfo.NumIncludes; + + return true; +} + + diff --git a/Lex/IdentifierTable.cpp b/Lex/IdentifierTable.cpp new file mode 100644 index 0000000000..e671af9839 --- /dev/null +++ b/Lex/IdentifierTable.cpp @@ -0,0 +1,188 @@ +//===--- IdentifierTable.cpp - Hash table for identifier lookup -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the IdentifierInfo, IdentifierVisitor, and +// IdentifierTable interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/IdentifierTable.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Basic/LangOptions.h" +using namespace clang; + +//===----------------------------------------------------------------------===// +// IdentifierInfo Implementation +//===----------------------------------------------------------------------===// + +IdentifierInfo::IdentifierInfo() { + Macro = 0; + TokenID = tok::identifier; + PPID = tok::pp_not_keyword; + ObjCID = tok::objc_not_keyword; + BuiltinID = 0; + IsExtension = false; + IsPoisoned = false; + IsOtherTargetMacro = false; + IsCPPOperatorKeyword = false; + FETokenInfo = 0; +} + +IdentifierInfo::~IdentifierInfo() { + delete Macro; +} + +//===----------------------------------------------------------------------===// +// IdentifierTable Implementation +//===----------------------------------------------------------------------===// + +IdentifierTable::IdentifierTable(const LangOptions &LangOpts) + // Start with space for 8K identifiers. + : HashTable(8192) { + + // Populate the identifier table with info about keywords for the current + // language. + AddKeywords(LangOpts); +} + +//===----------------------------------------------------------------------===// +// Language Keyword Implementation +//===----------------------------------------------------------------------===// + +/// AddKeyword - This method is used to associate a token ID with specific +/// identifiers because they are language keywords. This causes the lexer to +/// automatically map matching identifiers to specialized token codes. +/// +/// The C90/C99/CPP flags are set to 0 if the token should be enabled in the +/// specified langauge, set to 1 if it is an extension in the specified +/// language, and set to 2 if disabled in the specified language. +static void AddKeyword(const char *Keyword, unsigned KWLen, + tok::TokenKind TokenCode, + int C90, int C99, int CXX, + const LangOptions &LangOpts, IdentifierTable &Table) { + int Flags = LangOpts.CPlusPlus ? CXX : (LangOpts.C99 ? C99 : C90); + + // Don't add this keyword if disabled in this language or if an extension + // and extensions are disabled. + if (Flags + LangOpts.NoExtensions >= 2) return; + + IdentifierInfo &Info = Table.get(Keyword, Keyword+KWLen); + Info.setTokenID(TokenCode); + Info.setIsExtensionToken(Flags == 1); +} + +static void AddAlias(const char *Keyword, unsigned KWLen, + const char *AliaseeKeyword, unsigned AliaseeKWLen, + const LangOptions &LangOpts, IdentifierTable &Table) { + IdentifierInfo &AliasInfo = Table.get(Keyword, Keyword+KWLen); + IdentifierInfo &AliaseeInfo = Table.get(AliaseeKeyword, + AliaseeKeyword+AliaseeKWLen); + AliasInfo.setTokenID(AliaseeInfo.getTokenID()); + AliasInfo.setIsExtensionToken(AliaseeInfo.isExtensionToken()); +} + +/// AddPPKeyword - Register a preprocessor keyword like "define" "undef" or +/// "elif". +static void AddPPKeyword(tok::PPKeywordKind PPID, + const char *Name, unsigned NameLen, + IdentifierTable &Table) { + Table.get(Name, Name+NameLen).setPPKeywordID(PPID); +} + +/// AddCXXOperatorKeyword - Register a C++ operator keyword alternative +/// representations. +static void AddCXXOperatorKeyword(const char *Keyword, unsigned KWLen, + tok::TokenKind TokenCode, + IdentifierTable &Table) { + IdentifierInfo &Info = Table.get(Keyword, Keyword + KWLen); + Info.setTokenID(TokenCode); + Info.setIsCPlusplusOperatorKeyword(); +} + +/// AddObjCKeyword - Register an Objective-C @keyword like "class" "selector" or +/// "property". +static void AddObjCKeyword(tok::ObjCKeywordKind ObjCID, + const char *Name, unsigned NameLen, + IdentifierTable &Table) { + Table.get(Name, Name+NameLen).setObjCKeywordID(ObjCID); +} + +/// AddKeywords - Add all keywords to the symbol table. +/// +void IdentifierTable::AddKeywords(const LangOptions &LangOpts) { + enum { + C90Shift = 0, + EXTC90 = 1 << C90Shift, + NOTC90 = 2 << C90Shift, + C99Shift = 2, + EXTC99 = 1 << C99Shift, + NOTC99 = 2 << C99Shift, + CPPShift = 4, + EXTCPP = 1 << CPPShift, + NOTCPP = 2 << CPPShift, + Mask = 3 + }; + + // Add keywords and tokens for the current language. +#define KEYWORD(NAME, FLAGS) \ + AddKeyword(#NAME, strlen(#NAME), tok::kw_ ## NAME, \ + ((FLAGS) >> C90Shift) & Mask, \ + ((FLAGS) >> C99Shift) & Mask, \ + ((FLAGS) >> CPPShift) & Mask, LangOpts, *this); +#define ALIAS(NAME, TOK) \ + AddAlias(NAME, strlen(NAME), #TOK, strlen(#TOK), LangOpts, *this); +#define PPKEYWORD(NAME) \ + AddPPKeyword(tok::pp_##NAME, #NAME, strlen(#NAME), *this); +#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \ + if (LangOpts.CXXOperatorNames) \ + AddCXXOperatorKeyword(#NAME, strlen(#NAME), tok::ALIAS, *this); +#define OBJC1_AT_KEYWORD(NAME) \ + if (LangOpts.ObjC1) \ + AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this); +#define OBJC2_AT_KEYWORD(NAME) \ + if (LangOpts.ObjC2) \ + AddObjCKeyword(tok::objc_##NAME, #NAME, strlen(#NAME), *this); +#include "clang/Basic/TokenKinds.def" +} + + +//===----------------------------------------------------------------------===// +// Stats Implementation +//===----------------------------------------------------------------------===// + +/// PrintStats - Print statistics about how well the identifier table is doing +/// at hashing identifiers. +void IdentifierTable::PrintStats() const { + unsigned NumBuckets = HashTable.getNumBuckets(); + unsigned NumIdentifiers = HashTable.getNumItems(); + unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers; + unsigned AverageIdentifierSize = 0; + unsigned MaxIdentifierLength = 0; + + // TODO: Figure out maximum times an identifier had to probe for -stats. + for (llvm::StringMap<IdentifierInfo, llvm::BumpPtrAllocator>::const_iterator + I = HashTable.begin(), E = HashTable.end(); I != E; ++I) { + unsigned IdLen = I->getKeyLength(); + AverageIdentifierSize += IdLen; + if (MaxIdentifierLength < IdLen) + MaxIdentifierLength = IdLen; + } + + fprintf(stderr, "\n*** Identifier Table Stats:\n"); + fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers); + fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets); + fprintf(stderr, "Hash density (#identifiers per bucket): %f\n", + NumIdentifiers/(double)NumBuckets); + fprintf(stderr, "Ave identifier length: %f\n", + (AverageIdentifierSize/(double)NumIdentifiers)); + fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength); + + // Compute statistics about the memory allocated for identifiers. + HashTable.getAllocator().PrintStats(); +} diff --git a/Lex/Lexer.cpp b/Lex/Lexer.cpp new file mode 100644 index 0000000000..1775b2f7bf --- /dev/null +++ b/Lex/Lexer.cpp @@ -0,0 +1,1491 @@ +//===--- Lexer.cpp - C Language Family Lexer ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Lexer and LexerToken interfaces. +// +//===----------------------------------------------------------------------===// +// +// TODO: GCC Diagnostics emitted by the lexer: +// PEDWARN: (form feed|vertical tab) in preprocessing directive +// +// Universal characters, unicode, char mapping: +// WARNING: `%.*s' is not in NFKC +// WARNING: `%.*s' is not in NFC +// +// Other: +// TODO: Options to support: +// -fexec-charset,-fwide-exec-charset +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Lexer.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/Support/MemoryBuffer.h" +#include <cctype> +using namespace clang; + +static void InitCharacterInfo(); + +Lexer::Lexer(const llvm::MemoryBuffer *File, unsigned fileid, Preprocessor &pp, + const char *BufStart, const char *BufEnd) + : BufferEnd(BufEnd ? BufEnd : File->getBufferEnd()), + InputFile(File), CurFileID(fileid), PP(pp), Features(PP.getLangOptions()) { + Is_PragmaLexer = false; + IsMainFile = false; + InitCharacterInfo(); + + assert(BufferEnd[0] == 0 && + "We assume that the input buffer has a null character at the end" + " to simplify lexing!"); + + BufferPtr = BufStart ? BufStart : File->getBufferStart(); + + // Start of the file is a start of line. + IsAtStartOfLine = true; + + // We are not after parsing a #. + ParsingPreprocessorDirective = false; + + // We are not after parsing #include. + ParsingFilename = false; + + // We are not in raw mode. Raw mode disables diagnostics and interpretation + // of tokens (e.g. identifiers, thus disabling macro expansion). It is used + // to quickly lex the tokens of the buffer, e.g. when handling a "#if 0" block + // or otherwise skipping over tokens. + LexingRawMode = false; + + // Default to keeping comments if requested. + KeepCommentMode = PP.getCommentRetentionState(); +} + +/// Stringify - Convert the specified string into a C string, with surrounding +/// ""'s, and with escaped \ and " characters. +std::string Lexer::Stringify(const std::string &Str, bool Charify) { + std::string Result = Str; + char Quote = Charify ? '\'' : '"'; + for (unsigned i = 0, e = Result.size(); i != e; ++i) { + if (Result[i] == '\\' || Result[i] == Quote) { + Result.insert(Result.begin()+i, '\\'); + ++i; ++e; + } + } + return Result; +} + + +//===----------------------------------------------------------------------===// +// Character information. +//===----------------------------------------------------------------------===// + +static unsigned char CharInfo[256]; + +enum { + CHAR_HORZ_WS = 0x01, // ' ', '\t', '\f', '\v'. Note, no '\0' + CHAR_VERT_WS = 0x02, // '\r', '\n' + CHAR_LETTER = 0x04, // a-z,A-Z + CHAR_NUMBER = 0x08, // 0-9 + CHAR_UNDER = 0x10, // _ + CHAR_PERIOD = 0x20 // . +}; + +static void InitCharacterInfo() { + static bool isInited = false; + if (isInited) return; + isInited = true; + + // Intiialize the CharInfo table. + // TODO: statically initialize this. + CharInfo[(int)' '] = CharInfo[(int)'\t'] = + CharInfo[(int)'\f'] = CharInfo[(int)'\v'] = CHAR_HORZ_WS; + CharInfo[(int)'\n'] = CharInfo[(int)'\r'] = CHAR_VERT_WS; + + CharInfo[(int)'_'] = CHAR_UNDER; + CharInfo[(int)'.'] = CHAR_PERIOD; + for (unsigned i = 'a'; i <= 'z'; ++i) + CharInfo[i] = CharInfo[i+'A'-'a'] = CHAR_LETTER; + for (unsigned i = '0'; i <= '9'; ++i) + CharInfo[i] = CHAR_NUMBER; +} + +/// isIdentifierBody - Return true if this is the body character of an +/// identifier, which is [a-zA-Z0-9_]. +static inline bool isIdentifierBody(unsigned char c) { + return CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER); +} + +/// isHorizontalWhitespace - Return true if this character is horizontal +/// whitespace: ' ', '\t', '\f', '\v'. Note that this returns false for '\0'. +static inline bool isHorizontalWhitespace(unsigned char c) { + return CharInfo[c] & CHAR_HORZ_WS; +} + +/// isWhitespace - Return true if this character is horizontal or vertical +/// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'. Note that this returns false +/// for '\0'. +static inline bool isWhitespace(unsigned char c) { + return CharInfo[c] & (CHAR_HORZ_WS|CHAR_VERT_WS); +} + +/// isNumberBody - Return true if this is the body character of an +/// preprocessing number, which is [a-zA-Z0-9_.]. +static inline bool isNumberBody(unsigned char c) { + return CharInfo[c] & (CHAR_LETTER|CHAR_NUMBER|CHAR_UNDER|CHAR_PERIOD); +} + + +//===----------------------------------------------------------------------===// +// Diagnostics forwarding code. +//===----------------------------------------------------------------------===// + +/// getSourceLocation - Return a source location identifier for the specified +/// offset in the current file. +SourceLocation Lexer::getSourceLocation(const char *Loc) const { + assert(Loc >= InputFile->getBufferStart() && Loc <= BufferEnd && + "Location out of range for this buffer!"); + return SourceLocation(CurFileID, Loc-InputFile->getBufferStart()); +} + + +/// Diag - Forwarding function for diagnostics. This translate a source +/// position in the current buffer into a SourceLocation object for rendering. +void Lexer::Diag(const char *Loc, unsigned DiagID, + const std::string &Msg) const { + if (LexingRawMode && Diagnostic::isNoteWarningOrExtension(DiagID)) + return; + PP.Diag(getSourceLocation(Loc), DiagID, Msg); +} +void Lexer::Diag(SourceLocation Loc, unsigned DiagID, + const std::string &Msg) const { + if (LexingRawMode && Diagnostic::isNoteWarningOrExtension(DiagID)) + return; + PP.Diag(Loc, DiagID, Msg); +} + + +//===----------------------------------------------------------------------===// +// Trigraph and Escaped Newline Handling Code. +//===----------------------------------------------------------------------===// + +/// GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, +/// return the decoded trigraph letter it corresponds to, or '\0' if nothing. +static char GetTrigraphCharForLetter(char Letter) { + switch (Letter) { + default: return 0; + case '=': return '#'; + case ')': return ']'; + case '(': return '['; + case '!': return '|'; + case '\'': return '^'; + case '>': return '}'; + case '/': return '\\'; + case '<': return '{'; + case '-': return '~'; + } +} + +/// DecodeTrigraphChar - If the specified character is a legal trigraph when +/// prefixed with ??, emit a trigraph warning. If trigraphs are enabled, +/// return the result character. Finally, emit a warning about trigraph use +/// whether trigraphs are enabled or not. +static char DecodeTrigraphChar(const char *CP, Lexer *L) { + char Res = GetTrigraphCharForLetter(*CP); + if (Res && L) { + if (!L->getFeatures().Trigraphs) { + L->Diag(CP-2, diag::trigraph_ignored); + return 0; + } else { + L->Diag(CP-2, diag::trigraph_converted, std::string()+Res); + } + } + return Res; +} + +/// getCharAndSizeSlow - Peek a single 'character' from the specified buffer, +/// get its size, and return it. This is tricky in several cases: +/// 1. If currently at the start of a trigraph, we warn about the trigraph, +/// then either return the trigraph (skipping 3 chars) or the '?', +/// depending on whether trigraphs are enabled or not. +/// 2. If this is an escaped newline (potentially with whitespace between +/// the backslash and newline), implicitly skip the newline and return +/// the char after it. +/// 3. If this is a UCN, return it. FIXME: C++ UCN's? +/// +/// This handles the slow/uncommon case of the getCharAndSize method. Here we +/// know that we can accumulate into Size, and that we have already incremented +/// Ptr by Size bytes. +/// +/// NOTE: When this method is updated, getCharAndSizeSlowNoWarn (below) should +/// be updated to match. +/// +char Lexer::getCharAndSizeSlow(const char *Ptr, unsigned &Size, + LexerToken *Tok) { + // If we have a slash, look for an escaped newline. + if (Ptr[0] == '\\') { + ++Size; + ++Ptr; +Slash: + // Common case, backslash-char where the char is not whitespace. + if (!isWhitespace(Ptr[0])) return '\\'; + + // See if we have optional whitespace characters followed by a newline. + { + unsigned SizeTmp = 0; + do { + ++SizeTmp; + if (Ptr[SizeTmp-1] == '\n' || Ptr[SizeTmp-1] == '\r') { + // Remember that this token needs to be cleaned. + if (Tok) Tok->setFlag(LexerToken::NeedsCleaning); + + // Warn if there was whitespace between the backslash and newline. + if (SizeTmp != 1 && Tok) + Diag(Ptr, diag::backslash_newline_space); + + // If this is a \r\n or \n\r, skip the newlines. + if ((Ptr[SizeTmp] == '\r' || Ptr[SizeTmp] == '\n') && + Ptr[SizeTmp-1] != Ptr[SizeTmp]) + ++SizeTmp; + + // Found backslash<whitespace><newline>. Parse the char after it. + Size += SizeTmp; + Ptr += SizeTmp; + // Use slow version to accumulate a correct size field. + return getCharAndSizeSlow(Ptr, Size, Tok); + } + } while (isWhitespace(Ptr[SizeTmp])); + } + + // Otherwise, this is not an escaped newline, just return the slash. + return '\\'; + } + + // If this is a trigraph, process it. + if (Ptr[0] == '?' && Ptr[1] == '?') { + // If this is actually a legal trigraph (not something like "??x"), emit + // a trigraph warning. If so, and if trigraphs are enabled, return it. + if (char C = DecodeTrigraphChar(Ptr+2, Tok ? this : 0)) { + // Remember that this token needs to be cleaned. + if (Tok) Tok->setFlag(LexerToken::NeedsCleaning); + + Ptr += 3; + Size += 3; + if (C == '\\') goto Slash; + return C; + } + } + + // If this is neither, return a single character. + ++Size; + return *Ptr; +} + + +/// getCharAndSizeSlowNoWarn - Handle the slow/uncommon case of the +/// getCharAndSizeNoWarn method. Here we know that we can accumulate into Size, +/// and that we have already incremented Ptr by Size bytes. +/// +/// NOTE: When this method is updated, getCharAndSizeSlow (above) should +/// be updated to match. +char Lexer::getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, + const LangOptions &Features) { + // If we have a slash, look for an escaped newline. + if (Ptr[0] == '\\') { + ++Size; + ++Ptr; +Slash: + // Common case, backslash-char where the char is not whitespace. + if (!isWhitespace(Ptr[0])) return '\\'; + + // See if we have optional whitespace characters followed by a newline. + { + unsigned SizeTmp = 0; + do { + ++SizeTmp; + if (Ptr[SizeTmp-1] == '\n' || Ptr[SizeTmp-1] == '\r') { + + // If this is a \r\n or \n\r, skip the newlines. + if ((Ptr[SizeTmp] == '\r' || Ptr[SizeTmp] == '\n') && + Ptr[SizeTmp-1] != Ptr[SizeTmp]) + ++SizeTmp; + + // Found backslash<whitespace><newline>. Parse the char after it. + Size += SizeTmp; + Ptr += SizeTmp; + + // Use slow version to accumulate a correct size field. + return getCharAndSizeSlowNoWarn(Ptr, Size, Features); + } + } while (isWhitespace(Ptr[SizeTmp])); + } + + // Otherwise, this is not an escaped newline, just return the slash. + return '\\'; + } + + // If this is a trigraph, process it. + if (Features.Trigraphs && Ptr[0] == '?' && Ptr[1] == '?') { + // If this is actually a legal trigraph (not something like "??x"), return + // it. + if (char C = GetTrigraphCharForLetter(Ptr[2])) { + Ptr += 3; + Size += 3; + if (C == '\\') goto Slash; + return C; + } + } + + // If this is neither, return a single character. + ++Size; + return *Ptr; +} + +//===----------------------------------------------------------------------===// +// Helper methods for lexing. +//===----------------------------------------------------------------------===// + +void Lexer::LexIdentifier(LexerToken &Result, const char *CurPtr) { + // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$] + unsigned Size; + unsigned char C = *CurPtr++; + while (isIdentifierBody(C)) { + C = *CurPtr++; + } + --CurPtr; // Back up over the skipped character. + + // Fast path, no $,\,? in identifier found. '\' might be an escaped newline + // or UCN, and ? might be a trigraph for '\', an escaped newline or UCN. + // FIXME: UCNs. + if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) { +FinishIdentifier: + const char *IdStart = BufferPtr; + FormTokenWithChars(Result, CurPtr); + Result.setKind(tok::identifier); + + // If we are in raw mode, return this identifier raw. There is no need to + // look up identifier information or attempt to macro expand it. + if (LexingRawMode) return; + + // Fill in Result.IdentifierInfo, looking up the identifier in the + // identifier table. + PP.LookUpIdentifierInfo(Result, IdStart); + + // Finally, now that we know we have an identifier, pass this off to the + // preprocessor, which may macro expand it or something. + return PP.HandleIdentifier(Result); + } + + // Otherwise, $,\,? in identifier found. Enter slower path. + + C = getCharAndSize(CurPtr, Size); + while (1) { + if (C == '$') { + // If we hit a $ and they are not supported in identifiers, we are done. + if (!Features.DollarIdents) goto FinishIdentifier; + + // Otherwise, emit a diagnostic and continue. + Diag(CurPtr, diag::ext_dollar_in_identifier); + CurPtr = ConsumeChar(CurPtr, Size, Result); + C = getCharAndSize(CurPtr, Size); + continue; + } else if (!isIdentifierBody(C)) { // FIXME: UCNs. + // Found end of identifier. + goto FinishIdentifier; + } + + // Otherwise, this character is good, consume it. + CurPtr = ConsumeChar(CurPtr, Size, Result); + + C = getCharAndSize(CurPtr, Size); + while (isIdentifierBody(C)) { // FIXME: UCNs. + CurPtr = ConsumeChar(CurPtr, Size, Result); + C = getCharAndSize(CurPtr, Size); + } + } +} + + +/// LexNumericConstant - Lex the remainer of a integer or floating point +/// constant. From[-1] is the first character lexed. Return the end of the +/// constant. +void Lexer::LexNumericConstant(LexerToken &Result, const char *CurPtr) { + unsigned Size; + char C = getCharAndSize(CurPtr, Size); + char PrevCh = 0; + while (isNumberBody(C)) { // FIXME: UCNs? + CurPtr = ConsumeChar(CurPtr, Size, Result); + PrevCh = C; + C = getCharAndSize(CurPtr, Size); + } + + // If we fell out, check for a sign, due to 1e+12. If we have one, continue. + if ((C == '-' || C == '+') && (PrevCh == 'E' || PrevCh == 'e')) + return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); + + // If we have a hex FP constant, continue. + if (Features.HexFloats && + (C == '-' || C == '+') && (PrevCh == 'P' || PrevCh == 'p')) + return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); + + Result.setKind(tok::numeric_constant); + + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); +} + +/// LexStringLiteral - Lex the remainder of a string literal, after having lexed +/// either " or L". +void Lexer::LexStringLiteral(LexerToken &Result, const char *CurPtr, bool Wide){ + const char *NulCharacter = 0; // Does this string contain the \0 character? + + char C = getAndAdvanceChar(CurPtr, Result); + while (C != '"') { + // Skip escaped characters. + if (C == '\\') { + // Skip the escaped character. + C = getAndAdvanceChar(CurPtr, Result); + } else if (C == '\n' || C == '\r' || // Newline. + (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. + if (!LexingRawMode) Diag(BufferPtr, diag::err_unterminated_string); + Result.setKind(tok::unknown); + FormTokenWithChars(Result, CurPtr-1); + return; + } else if (C == 0) { + NulCharacter = CurPtr-1; + } + C = getAndAdvanceChar(CurPtr, Result); + } + + // If a nul character existed in the string, warn about it. + if (NulCharacter) Diag(NulCharacter, diag::null_in_string); + + Result.setKind(Wide ? tok::wide_string_literal : tok::string_literal); + + // Update the location of the token as well as the BufferPtr instance var. + FormTokenWithChars(Result, CurPtr); +} + +/// LexAngledStringLiteral - Lex the remainder of an angled string literal, +/// after having lexed the '<' character. This is used for #include filenames. +void Lexer::LexAngledStringLiteral(LexerToken &Result, const char *CurPtr) { + const char *NulCharacter = 0; // Does this string contain the \0 character? + + char C = getAndAdvanceChar(CurPtr, Result); + while (C != '>') { + // Skip escaped characters. + if (C == '\\') { + // Skip the escaped character. + C = getAndAdvanceChar(CurPtr, Result); + } else if (C == '\n' || C == '\r' || // Newline. + (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. + if (!LexingRawMode) Diag(BufferPtr, diag::err_unterminated_string); + Result.setKind(tok::unknown); + FormTokenWithChars(Result, CurPtr-1); + return; + } else if (C == 0) { + NulCharacter = CurPtr-1; + } + C = getAndAdvanceChar(CurPtr, Result); + } + + // If a nul character existed in the string, warn about it. + if (NulCharacter) Diag(NulCharacter, diag::null_in_string); + + Result.setKind(tok::angle_string_literal); + + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); +} + + +/// LexCharConstant - Lex the remainder of a character constant, after having +/// lexed either ' or L'. +void Lexer::LexCharConstant(LexerToken &Result, const char *CurPtr) { + const char *NulCharacter = 0; // Does this character contain the \0 character? + + // Handle the common case of 'x' and '\y' efficiently. + char C = getAndAdvanceChar(CurPtr, Result); + if (C == '\'') { + if (!LexingRawMode) Diag(BufferPtr, diag::err_empty_character); + Result.setKind(tok::unknown); + FormTokenWithChars(Result, CurPtr); + return; + } else if (C == '\\') { + // Skip the escaped character. + // FIXME: UCN's. + C = getAndAdvanceChar(CurPtr, Result); + } + + if (C && C != '\n' && C != '\r' && CurPtr[0] == '\'') { + ++CurPtr; + } else { + // Fall back on generic code for embedded nulls, newlines, wide chars. + do { + // Skip escaped characters. + if (C == '\\') { + // Skip the escaped character. + C = getAndAdvanceChar(CurPtr, Result); + } else if (C == '\n' || C == '\r' || // Newline. + (C == 0 && CurPtr-1 == BufferEnd)) { // End of file. + if (!LexingRawMode) Diag(BufferPtr, diag::err_unterminated_char); + Result.setKind(tok::unknown); + FormTokenWithChars(Result, CurPtr-1); + return; + } else if (C == 0) { + NulCharacter = CurPtr-1; + } + C = getAndAdvanceChar(CurPtr, Result); + } while (C != '\''); + } + + if (NulCharacter) Diag(NulCharacter, diag::null_in_char); + + Result.setKind(tok::char_constant); + + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); +} + +/// SkipWhitespace - Efficiently skip over a series of whitespace characters. +/// Update BufferPtr to point to the next non-whitespace character and return. +void Lexer::SkipWhitespace(LexerToken &Result, const char *CurPtr) { + // Whitespace - Skip it, then return the token after the whitespace. + unsigned char Char = *CurPtr; // Skip consequtive spaces efficiently. + while (1) { + // Skip horizontal whitespace very aggressively. + while (isHorizontalWhitespace(Char)) + Char = *++CurPtr; + + // Otherwise if we something other than whitespace, we're done. + if (Char != '\n' && Char != '\r') + break; + + if (ParsingPreprocessorDirective) { + // End of preprocessor directive line, let LexTokenInternal handle this. + BufferPtr = CurPtr; + return; + } + + // ok, but handle newline. + // The returned token is at the start of the line. + Result.setFlag(LexerToken::StartOfLine); + // No leading whitespace seen so far. + Result.clearFlag(LexerToken::LeadingSpace); + Char = *++CurPtr; + } + + // If this isn't immediately after a newline, there is leading space. + char PrevChar = CurPtr[-1]; + if (PrevChar != '\n' && PrevChar != '\r') + Result.setFlag(LexerToken::LeadingSpace); + + // If the next token is obviously a // or /* */ comment, skip it efficiently + // too (without going through the big switch stmt). + if (Char == '/' && CurPtr[1] == '/' && !KeepCommentMode) { + BufferPtr = CurPtr; + SkipBCPLComment(Result, CurPtr+1); + return; + } + if (Char == '/' && CurPtr[1] == '*' && !KeepCommentMode) { + BufferPtr = CurPtr; + SkipBlockComment(Result, CurPtr+2); + return; + } + BufferPtr = CurPtr; +} + +// SkipBCPLComment - We have just read the // characters from input. Skip until +// we find the newline character thats terminate the comment. Then update +/// BufferPtr and return. +bool Lexer::SkipBCPLComment(LexerToken &Result, const char *CurPtr) { + // If BCPL comments aren't explicitly enabled for this language, emit an + // extension warning. + if (!Features.BCPLComment) { + Diag(BufferPtr, diag::ext_bcpl_comment); + + // Mark them enabled so we only emit one warning for this translation + // unit. + Features.BCPLComment = true; + } + + // Scan over the body of the comment. The common case, when scanning, is that + // the comment contains normal ascii characters with nothing interesting in + // them. As such, optimize for this case with the inner loop. + char C; + do { + C = *CurPtr; + // FIXME: Speedup BCPL comment lexing. Just scan for a \n or \r character. + // If we find a \n character, scan backwards, checking to see if it's an + // escaped newline, like we do for block comments. + + // Skip over characters in the fast loop. + while (C != 0 && // Potentially EOF. + C != '\\' && // Potentially escaped newline. + C != '?' && // Potentially trigraph. + C != '\n' && C != '\r') // Newline or DOS-style newline. + C = *++CurPtr; + + // If this is a newline, we're done. + if (C == '\n' || C == '\r') + break; // Found the newline? Break out! + + // Otherwise, this is a hard case. Fall back on getAndAdvanceChar to + // properly decode the character. + const char *OldPtr = CurPtr; + C = getAndAdvanceChar(CurPtr, Result); + + // If we read multiple characters, and one of those characters was a \r or + // \n, then we had an escaped newline within the comment. Emit diagnostic + // unless the next line is also a // comment. + if (CurPtr != OldPtr+1 && C != '/' && CurPtr[0] != '/') { + for (; OldPtr != CurPtr; ++OldPtr) + if (OldPtr[0] == '\n' || OldPtr[0] == '\r') { + // Okay, we found a // comment that ends in a newline, if the next + // line is also a // comment, but has spaces, don't emit a diagnostic. + if (isspace(C)) { + const char *ForwardPtr = CurPtr; + while (isspace(*ForwardPtr)) // Skip whitespace. + ++ForwardPtr; + if (ForwardPtr[0] == '/' && ForwardPtr[1] == '/') + break; + } + + Diag(OldPtr-1, diag::ext_multi_line_bcpl_comment); + break; + } + } + + if (CurPtr == BufferEnd+1) { --CurPtr; break; } + } while (C != '\n' && C != '\r'); + + // Found but did not consume the newline. + + // If we are returning comments as tokens, return this comment as a token. + if (KeepCommentMode) + return SaveBCPLComment(Result, CurPtr); + + // If we are inside a preprocessor directive and we see the end of line, + // return immediately, so that the lexer can return this as an EOM token. + if (ParsingPreprocessorDirective || CurPtr == BufferEnd) { + BufferPtr = CurPtr; + return true; + } + + // Otherwise, eat the \n character. We don't care if this is a \n\r or + // \r\n sequence. + ++CurPtr; + + // The next returned token is at the start of the line. + Result.setFlag(LexerToken::StartOfLine); + // No leading whitespace seen so far. + Result.clearFlag(LexerToken::LeadingSpace); + + // It is common for the tokens immediately after a // comment to be + // whitespace (indentation for the next line). Instead of going through the + // big switch, handle it efficiently now. + if (isWhitespace(*CurPtr)) { + Result.setFlag(LexerToken::LeadingSpace); + SkipWhitespace(Result, CurPtr+1); + return true; + } + + BufferPtr = CurPtr; + return true; +} + +/// SaveBCPLComment - If in save-comment mode, package up this BCPL comment in +/// an appropriate way and return it. +bool Lexer::SaveBCPLComment(LexerToken &Result, const char *CurPtr) { + Result.setKind(tok::comment); + FormTokenWithChars(Result, CurPtr); + + // If this BCPL-style comment is in a macro definition, transmogrify it into + // a C-style block comment. + if (ParsingPreprocessorDirective) { + std::string Spelling = PP.getSpelling(Result); + assert(Spelling[0] == '/' && Spelling[1] == '/' && "Not bcpl comment?"); + Spelling[1] = '*'; // Change prefix to "/*". + Spelling += "*/"; // add suffix. + + Result.setLocation(PP.CreateString(&Spelling[0], Spelling.size(), + Result.getLocation())); + Result.setLength(Spelling.size()); + } + return false; +} + +/// isBlockCommentEndOfEscapedNewLine - Return true if the specified newline +/// character (either \n or \r) is part of an escaped newline sequence. Issue a +/// diagnostic if so. We know that the is inside of a block comment. +static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, + Lexer *L) { + assert(CurPtr[0] == '\n' || CurPtr[0] == '\r'); + + // Back up off the newline. + --CurPtr; + + // If this is a two-character newline sequence, skip the other character. + if (CurPtr[0] == '\n' || CurPtr[0] == '\r') { + // \n\n or \r\r -> not escaped newline. + if (CurPtr[0] == CurPtr[1]) + return false; + // \n\r or \r\n -> skip the newline. + --CurPtr; + } + + // If we have horizontal whitespace, skip over it. We allow whitespace + // between the slash and newline. + bool HasSpace = false; + while (isHorizontalWhitespace(*CurPtr) || *CurPtr == 0) { + --CurPtr; + HasSpace = true; + } + + // If we have a slash, we know this is an escaped newline. + if (*CurPtr == '\\') { + if (CurPtr[-1] != '*') return false; + } else { + // It isn't a slash, is it the ?? / trigraph? + if (CurPtr[0] != '/' || CurPtr[-1] != '?' || CurPtr[-2] != '?' || + CurPtr[-3] != '*') + return false; + + // This is the trigraph ending the comment. Emit a stern warning! + CurPtr -= 2; + + // If no trigraphs are enabled, warn that we ignored this trigraph and + // ignore this * character. + if (!L->getFeatures().Trigraphs) { + L->Diag(CurPtr, diag::trigraph_ignored_block_comment); + return false; + } + L->Diag(CurPtr, diag::trigraph_ends_block_comment); + } + + // Warn about having an escaped newline between the */ characters. + L->Diag(CurPtr, diag::escaped_newline_block_comment_end); + + // If there was space between the backslash and newline, warn about it. + if (HasSpace) L->Diag(CurPtr, diag::backslash_newline_space); + + return true; +} + +#ifdef __SSE2__ +#include <emmintrin.h> +#elif __ALTIVEC__ +#include <altivec.h> +#undef bool +#endif + +/// SkipBlockComment - We have just read the /* characters from input. Read +/// until we find the */ characters that terminate the comment. Note that we +/// don't bother decoding trigraphs or escaped newlines in block comments, +/// because they cannot cause the comment to end. The only thing that can +/// happen is the comment could end with an escaped newline between the */ end +/// of comment. +bool Lexer::SkipBlockComment(LexerToken &Result, const char *CurPtr) { + // Scan one character past where we should, looking for a '/' character. Once + // we find it, check to see if it was preceeded by a *. This common + // optimization helps people who like to put a lot of * characters in their + // comments. + unsigned char C = *CurPtr++; + if (C == 0 && CurPtr == BufferEnd+1) { + Diag(BufferPtr, diag::err_unterminated_block_comment); + BufferPtr = CurPtr-1; + return true; + } + + while (1) { + // Skip over all non-interesting characters until we find end of buffer or a + // (probably ending) '/' character. + if (CurPtr + 24 < BufferEnd) { + // While not aligned to a 16-byte boundary. + while (C != '/' && ((intptr_t)CurPtr & 0x0F) != 0) + C = *CurPtr++; + + if (C == '/') goto FoundSlash; + +#ifdef __SSE2__ + __m128i Slashes = _mm_set_epi8('/', '/', '/', '/', '/', '/', '/', '/', + '/', '/', '/', '/', '/', '/', '/', '/'); + while (CurPtr+16 <= BufferEnd && + _mm_movemask_epi8(_mm_cmpeq_epi8(*(__m128i*)CurPtr, Slashes)) == 0) + CurPtr += 16; +#elif __ALTIVEC__ + __vector unsigned char Slashes = { + '/', '/', '/', '/', '/', '/', '/', '/', + '/', '/', '/', '/', '/', '/', '/', '/' + }; + while (CurPtr+16 <= BufferEnd && + !vec_any_eq(*(vector unsigned char*)CurPtr, Slashes)) + CurPtr += 16; +#else + // Scan for '/' quickly. Many block comments are very large. + while (CurPtr[0] != '/' && + CurPtr[1] != '/' && + CurPtr[2] != '/' && + CurPtr[3] != '/' && + CurPtr+4 < BufferEnd) { + CurPtr += 4; + } +#endif + + // It has to be one of the bytes scanned, increment to it and read one. + C = *CurPtr++; + } + + // Loop to scan the remainder. + while (C != '/' && C != '\0') + C = *CurPtr++; + + FoundSlash: + if (C == '/') { + if (CurPtr[-2] == '*') // We found the final */. We're done! + break; + + if ((CurPtr[-2] == '\n' || CurPtr[-2] == '\r')) { + if (isEndOfBlockCommentWithEscapedNewLine(CurPtr-2, this)) { + // We found the final */, though it had an escaped newline between the + // * and /. We're done! + break; + } + } + if (CurPtr[0] == '*' && CurPtr[1] != '/') { + // If this is a /* inside of the comment, emit a warning. Don't do this + // if this is a /*/, which will end the comment. This misses cases with + // embedded escaped newlines, but oh well. + Diag(CurPtr-1, diag::nested_block_comment); + } + } else if (C == 0 && CurPtr == BufferEnd+1) { + Diag(BufferPtr, diag::err_unterminated_block_comment); + // Note: the user probably forgot a */. We could continue immediately + // after the /*, but this would involve lexing a lot of what really is the + // comment, which surely would confuse the parser. + BufferPtr = CurPtr-1; + return true; + } + C = *CurPtr++; + } + + // If we are returning comments as tokens, return this comment as a token. + if (KeepCommentMode) { + Result.setKind(tok::comment); + FormTokenWithChars(Result, CurPtr); + return false; + } + + // It is common for the tokens immediately after a /**/ comment to be + // whitespace. Instead of going through the big switch, handle it + // efficiently now. + if (isHorizontalWhitespace(*CurPtr)) { + Result.setFlag(LexerToken::LeadingSpace); + SkipWhitespace(Result, CurPtr+1); + return true; + } + + // Otherwise, just return so that the next character will be lexed as a token. + BufferPtr = CurPtr; + Result.setFlag(LexerToken::LeadingSpace); + return true; +} + +//===----------------------------------------------------------------------===// +// Primary Lexing Entry Points +//===----------------------------------------------------------------------===// + +/// LexIncludeFilename - After the preprocessor has parsed a #include, lex and +/// (potentially) macro expand the filename. +void Lexer::LexIncludeFilename(LexerToken &FilenameTok) { + assert(ParsingPreprocessorDirective && + ParsingFilename == false && + "Must be in a preprocessing directive!"); + + // We are now parsing a filename! + ParsingFilename = true; + + // Lex the filename. + Lex(FilenameTok); + + // We should have obtained the filename now. + ParsingFilename = false; + + // No filename? + if (FilenameTok.getKind() == tok::eom) + Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); +} + +/// ReadToEndOfLine - Read the rest of the current preprocessor line as an +/// uninterpreted string. This switches the lexer out of directive mode. +std::string Lexer::ReadToEndOfLine() { + assert(ParsingPreprocessorDirective && ParsingFilename == false && + "Must be in a preprocessing directive!"); + std::string Result; + LexerToken Tmp; + + // CurPtr - Cache BufferPtr in an automatic variable. + const char *CurPtr = BufferPtr; + while (1) { + char Char = getAndAdvanceChar(CurPtr, Tmp); + switch (Char) { + default: + Result += Char; + break; + case 0: // Null. + // Found end of file? + if (CurPtr-1 != BufferEnd) { + // Nope, normal character, continue. + Result += Char; + break; + } + // FALL THROUGH. + case '\r': + case '\n': + // Okay, we found the end of the line. First, back up past the \0, \r, \n. + assert(CurPtr[-1] == Char && "Trigraphs for newline?"); + BufferPtr = CurPtr-1; + + // Next, lex the character, which should handle the EOM transition. + Lex(Tmp); + assert(Tmp.getKind() == tok::eom && "Unexpected token!"); + + // Finally, we're done, return the string we found. + return Result; + } + } +} + +/// LexEndOfFile - CurPtr points to the end of this file. Handle this +/// condition, reporting diagnostics and handling other edge cases as required. +/// This returns true if Result contains a token, false if PP.Lex should be +/// called again. +bool Lexer::LexEndOfFile(LexerToken &Result, const char *CurPtr) { + // If we hit the end of the file while parsing a preprocessor directive, + // end the preprocessor directive first. The next token returned will + // then be the end of file. + if (ParsingPreprocessorDirective) { + // Done parsing the "line". + ParsingPreprocessorDirective = false; + Result.setKind(tok::eom); + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); + + // Restore comment saving mode, in case it was disabled for directive. + KeepCommentMode = PP.getCommentRetentionState(); + return true; // Have a token. + } + + // If we are in raw mode, return this event as an EOF token. Let the caller + // that put us in raw mode handle the event. + if (LexingRawMode) { + Result.startToken(); + BufferPtr = BufferEnd; + FormTokenWithChars(Result, BufferEnd); + Result.setKind(tok::eof); + return true; + } + + // Otherwise, issue diagnostics for unterminated #if and missing newline. + + // If we are in a #if directive, emit an error. + while (!ConditionalStack.empty()) { + Diag(ConditionalStack.back().IfLoc, diag::err_pp_unterminated_conditional); + ConditionalStack.pop_back(); + } + + // If the file was empty or didn't end in a newline, issue a pedwarn. + if (CurPtr[-1] != '\n' && CurPtr[-1] != '\r') + Diag(BufferEnd, diag::ext_no_newline_eof); + + BufferPtr = CurPtr; + + // Finally, let the preprocessor handle this. + return PP.HandleEndOfFile(Result); +} + +/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from +/// the specified lexer will return a tok::l_paren token, 0 if it is something +/// else and 2 if there are no more tokens in the buffer controlled by the +/// lexer. +unsigned Lexer::isNextPPTokenLParen() { + assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?"); + + // Switch to 'skipping' mode. This will ensure that we can lex a token + // without emitting diagnostics, disables macro expansion, and will cause EOF + // to return an EOF token instead of popping the include stack. + LexingRawMode = true; + + // Save state that can be changed while lexing so that we can restore it. + const char *TmpBufferPtr = BufferPtr; + + LexerToken Tok; + Tok.startToken(); + LexTokenInternal(Tok); + + // Restore state that may have changed. + BufferPtr = TmpBufferPtr; + + // Restore the lexer back to non-skipping mode. + LexingRawMode = false; + + if (Tok.getKind() == tok::eof) + return 2; + return Tok.getKind() == tok::l_paren; +} + + +/// LexTokenInternal - This implements a simple C family lexer. It is an +/// extremely performance critical piece of code. This assumes that the buffer +/// has a null character at the end of the file. Return true if an error +/// occurred and compilation should terminate, false if normal. This returns a +/// preprocessing token, not a normal token, as such, it is an internal +/// interface. It assumes that the Flags of result have been cleared before +/// calling this. +void Lexer::LexTokenInternal(LexerToken &Result) { +LexNextToken: + // New token, can't need cleaning yet. + Result.clearFlag(LexerToken::NeedsCleaning); + Result.setIdentifierInfo(0); + + // CurPtr - Cache BufferPtr in an automatic variable. + const char *CurPtr = BufferPtr; + + // Small amounts of horizontal whitespace is very common between tokens. + if ((*CurPtr == ' ') || (*CurPtr == '\t')) { + ++CurPtr; + while ((*CurPtr == ' ') || (*CurPtr == '\t')) + ++CurPtr; + BufferPtr = CurPtr; + Result.setFlag(LexerToken::LeadingSpace); + } + + unsigned SizeTmp, SizeTmp2; // Temporaries for use in cases below. + + // Read a character, advancing over it. + char Char = getAndAdvanceChar(CurPtr, Result); + switch (Char) { + case 0: // Null. + // Found end of file? + if (CurPtr-1 == BufferEnd) { + // Read the PP instance variable into an automatic variable, because + // LexEndOfFile will often delete 'this'. + Preprocessor &PPCache = PP; + if (LexEndOfFile(Result, CurPtr-1)) // Retreat back into the file. + return; // Got a token to return. + return PPCache.Lex(Result); + } + + Diag(CurPtr-1, diag::null_in_file); + Result.setFlag(LexerToken::LeadingSpace); + SkipWhitespace(Result, CurPtr); + goto LexNextToken; // GCC isn't tail call eliminating. + case '\n': + case '\r': + // If we are inside a preprocessor directive and we see the end of line, + // we know we are done with the directive, so return an EOM token. + if (ParsingPreprocessorDirective) { + // Done parsing the "line". + ParsingPreprocessorDirective = false; + + // Restore comment saving mode, in case it was disabled for directive. + KeepCommentMode = PP.getCommentRetentionState(); + + // Since we consumed a newline, we are back at the start of a line. + IsAtStartOfLine = true; + + Result.setKind(tok::eom); + break; + } + // The returned token is at the start of the line. + Result.setFlag(LexerToken::StartOfLine); + // No leading whitespace seen so far. + Result.clearFlag(LexerToken::LeadingSpace); + SkipWhitespace(Result, CurPtr); + goto LexNextToken; // GCC isn't tail call eliminating. + case ' ': + case '\t': + case '\f': + case '\v': + Result.setFlag(LexerToken::LeadingSpace); + SkipWhitespace(Result, CurPtr); + goto LexNextToken; // GCC isn't tail call eliminating. + + case 'L': + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + Char = getCharAndSize(CurPtr, SizeTmp); + + // Wide string literal. + if (Char == '"') + return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result), + true); + + // Wide character constant. + if (Char == '\'') + return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result)); + // FALL THROUGH, treating L like the start of an identifier. + + // C99 6.4.2: Identifiers. + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': + case 'H': case 'I': case 'J': case 'K': /*'L'*/case 'M': case 'N': + case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'V': case 'W': case 'X': case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + case '_': + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + return LexIdentifier(Result, CurPtr); + + // C99 6.4.4.1: Integer Constants. + // C99 6.4.4.2: Floating Constants. + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + return LexNumericConstant(Result, CurPtr); + + // C99 6.4.4: Character Constants. + case '\'': + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + return LexCharConstant(Result, CurPtr); + + // C99 6.4.5: String Literals. + case '"': + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + return LexStringLiteral(Result, CurPtr, false); + + // C99 6.4.6: Punctuators. + case '?': + Result.setKind(tok::question); + break; + case '[': + Result.setKind(tok::l_square); + break; + case ']': + Result.setKind(tok::r_square); + break; + case '(': + Result.setKind(tok::l_paren); + break; + case ')': + Result.setKind(tok::r_paren); + break; + case '{': + Result.setKind(tok::l_brace); + break; + case '}': + Result.setKind(tok::r_brace); + break; + case '.': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char >= '0' && Char <= '9') { + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + + return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result)); + } else if (Features.CPlusPlus && Char == '*') { + Result.setKind(tok::periodstar); + CurPtr += SizeTmp; + } else if (Char == '.' && + getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '.') { + Result.setKind(tok::ellipsis); + CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result); + } else { + Result.setKind(tok::period); + } + break; + case '&': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '&') { + Result.setKind(tok::ampamp); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Char == '=') { + Result.setKind(tok::ampequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::amp); + } + break; + case '*': + if (getCharAndSize(CurPtr, SizeTmp) == '=') { + Result.setKind(tok::starequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::star); + } + break; + case '+': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '+') { + Result.setKind(tok::plusplus); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Char == '=') { + Result.setKind(tok::plusequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::plus); + } + break; + case '-': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '-') { + Result.setKind(tok::minusminus); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Char == '>' && Features.CPlusPlus && + getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '*') { + Result.setKind(tok::arrowstar); // C++ ->* + CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result); + } else if (Char == '>') { + Result.setKind(tok::arrow); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Char == '=') { + Result.setKind(tok::minusequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::minus); + } + break; + case '~': + Result.setKind(tok::tilde); + break; + case '!': + if (getCharAndSize(CurPtr, SizeTmp) == '=') { + Result.setKind(tok::exclaimequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::exclaim); + } + break; + case '/': + // 6.4.9: Comments + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '/') { // BCPL comment. + if (SkipBCPLComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) + goto LexNextToken; // GCC isn't tail call eliminating. + return; // KeepCommentMode + } else if (Char == '*') { // /**/ comment. + if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result))) + goto LexNextToken; // GCC isn't tail call eliminating. + return; // KeepCommentMode + } else if (Char == '=') { + Result.setKind(tok::slashequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::slash); + } + break; + case '%': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '=') { + Result.setKind(tok::percentequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Features.Digraphs && Char == '>') { + Result.setKind(tok::r_brace); // '%>' -> '}' + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Features.Digraphs && Char == ':') { + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == ':') { + Result.setKind(tok::hashhash); // '%:%:' -> '##' + CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result); + } else if (Char == '@' && Features.Microsoft) { // %:@ -> #@ -> Charize + Result.setKind(tok::hashat); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + Diag(BufferPtr, diag::charize_microsoft_ext); + } else { + Result.setKind(tok::hash); // '%:' -> '#' + + // We parsed a # character. If this occurs at the start of the line, + // it's actually the start of a preprocessing directive. Callback to + // the preprocessor to handle it. + // FIXME: -fpreprocessed mode?? + if (Result.isAtStartOfLine() && !LexingRawMode) { + BufferPtr = CurPtr; + PP.HandleDirective(Result); + + // As an optimization, if the preprocessor didn't switch lexers, tail + // recurse. + if (PP.isCurrentLexer(this)) { + // Start a new token. If this is a #include or something, the PP may + // want us starting at the beginning of the line again. If so, set + // the StartOfLine flag. + if (IsAtStartOfLine) { + Result.setFlag(LexerToken::StartOfLine); + IsAtStartOfLine = false; + } + goto LexNextToken; // GCC isn't tail call eliminating. + } + + return PP.Lex(Result); + } + } + } else { + Result.setKind(tok::percent); + } + break; + case '<': + Char = getCharAndSize(CurPtr, SizeTmp); + if (ParsingFilename) { + return LexAngledStringLiteral(Result, CurPtr+SizeTmp); + } else if (Char == '<' && + getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') { + Result.setKind(tok::lesslessequal); + CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result); + } else if (Char == '<') { + Result.setKind(tok::lessless); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Char == '=') { + Result.setKind(tok::lessequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Features.Digraphs && Char == ':') { + Result.setKind(tok::l_square); // '<:' -> '[' + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Features.Digraphs && Char == '>') { + Result.setKind(tok::l_brace); // '<%' -> '{' + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::less); + } + break; + case '>': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '=') { + Result.setKind(tok::greaterequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Char == '>' && + getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') { + Result.setKind(tok::greatergreaterequal); + CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), + SizeTmp2, Result); + } else if (Char == '>') { + Result.setKind(tok::greatergreater); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::greater); + } + break; + case '^': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '=') { + Result.setKind(tok::caretequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::caret); + } + break; + case '|': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '=') { + Result.setKind(tok::pipeequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Char == '|') { + Result.setKind(tok::pipepipe); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::pipe); + } + break; + case ':': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Features.Digraphs && Char == '>') { + Result.setKind(tok::r_square); // ':>' -> ']' + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Features.CPlusPlus && Char == ':') { + Result.setKind(tok::coloncolon); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::colon); + } + break; + case ';': + Result.setKind(tok::semi); + break; + case '=': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '=') { + Result.setKind(tok::equalequal); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::equal); + } + break; + case ',': + Result.setKind(tok::comma); + break; + case '#': + Char = getCharAndSize(CurPtr, SizeTmp); + if (Char == '#') { + Result.setKind(tok::hashhash); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else if (Char == '@' && Features.Microsoft) { // #@ -> Charize + Result.setKind(tok::hashat); + Diag(BufferPtr, diag::charize_microsoft_ext); + CurPtr = ConsumeChar(CurPtr, SizeTmp, Result); + } else { + Result.setKind(tok::hash); + // We parsed a # character. If this occurs at the start of the line, + // it's actually the start of a preprocessing directive. Callback to + // the preprocessor to handle it. + // FIXME: -fpreprocessed mode?? + if (Result.isAtStartOfLine() && !LexingRawMode) { + BufferPtr = CurPtr; + PP.HandleDirective(Result); + + // As an optimization, if the preprocessor didn't switch lexers, tail + // recurse. + if (PP.isCurrentLexer(this)) { + // Start a new token. If this is a #include or something, the PP may + // want us starting at the beginning of the line again. If so, set + // the StartOfLine flag. + if (IsAtStartOfLine) { + Result.setFlag(LexerToken::StartOfLine); + IsAtStartOfLine = false; + } + goto LexNextToken; // GCC isn't tail call eliminating. + } + return PP.Lex(Result); + } + } + break; + + case '\\': + // FIXME: UCN's. + // FALL THROUGH. + default: + // Objective C support. + if (CurPtr[-1] == '@' && Features.ObjC1) { + Result.setKind(tok::at); + break; + } else if (CurPtr[-1] == '$' && Features.DollarIdents) {// $ in identifiers. + Diag(CurPtr-1, diag::ext_dollar_in_identifier); + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + return LexIdentifier(Result, CurPtr); + } + + Result.setKind(tok::unknown); + break; + } + + // Notify MIOpt that we read a non-whitespace/non-comment token. + MIOpt.ReadToken(); + + // Update the location of token as well as BufferPtr. + FormTokenWithChars(Result, CurPtr); +} diff --git a/Lex/LiteralSupport.cpp b/Lex/LiteralSupport.cpp new file mode 100644 index 0000000000..5d9c7bd11e --- /dev/null +++ b/Lex/LiteralSupport.cpp @@ -0,0 +1,661 @@ +//===--- LiteralSupport.cpp - Code to parse and process literals ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Steve Naroff and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the NumericLiteralParser, CharLiteralParser, and +// StringLiteralParser interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/LiteralSupport.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/StringExtras.h" +using namespace clang; + +/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's +/// not valid. +static int HexDigitValue(char C) { + if (C >= '0' && C <= '9') return C-'0'; + if (C >= 'a' && C <= 'f') return C-'a'+10; + if (C >= 'A' && C <= 'F') return C-'A'+10; + return -1; +} + +/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in +/// either a character or a string literal. +static unsigned ProcessCharEscape(const char *&ThisTokBuf, + const char *ThisTokEnd, bool &HadError, + SourceLocation Loc, bool IsWide, + Preprocessor &PP) { + // Skip the '\' char. + ++ThisTokBuf; + + // We know that this character can't be off the end of the buffer, because + // that would have been \", which would not have been the end of string. + unsigned ResultChar = *ThisTokBuf++; + switch (ResultChar) { + // These map to themselves. + case '\\': case '\'': case '"': case '?': break; + + // These have fixed mappings. + case 'a': + // TODO: K&R: the meaning of '\\a' is different in traditional C + ResultChar = 7; + break; + case 'b': + ResultChar = 8; + break; + case 'e': + PP.Diag(Loc, diag::ext_nonstandard_escape, "e"); + ResultChar = 27; + break; + case 'f': + ResultChar = 12; + break; + case 'n': + ResultChar = 10; + break; + case 'r': + ResultChar = 13; + break; + case 't': + ResultChar = 9; + break; + case 'v': + ResultChar = 11; + break; + + //case 'u': case 'U': // FIXME: UCNs. + case 'x': { // Hex escape. + ResultChar = 0; + if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) { + PP.Diag(Loc, diag::err_hex_escape_no_digits); + HadError = 1; + break; + } + + // Hex escapes are a maximal series of hex digits. + bool Overflow = false; + for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) { + int CharVal = HexDigitValue(ThisTokBuf[0]); + if (CharVal == -1) break; + Overflow |= ResultChar & 0xF0000000; // About to shift out a digit? + ResultChar <<= 4; + ResultChar |= CharVal; + } + + // See if any bits will be truncated when evaluated as a character. + unsigned CharWidth = IsWide ? PP.getTargetInfo().getWCharWidth(Loc) + : PP.getTargetInfo().getCharWidth(Loc); + if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) { + Overflow = true; + ResultChar &= ~0U >> (32-CharWidth); + } + + // Check for overflow. + if (Overflow) // Too many digits to fit in + PP.Diag(Loc, diag::warn_hex_escape_too_large); + break; + } + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': { + // Octal escapes. + --ThisTokBuf; + ResultChar = 0; + + // Octal escapes are a series of octal digits with maximum length 3. + // "\0123" is a two digit sequence equal to "\012" "3". + unsigned NumDigits = 0; + do { + ResultChar <<= 3; + ResultChar |= *ThisTokBuf++ - '0'; + ++NumDigits; + } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 && + ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7'); + + // Check for overflow. Reject '\777', but not L'\777'. + unsigned CharWidth = IsWide ? PP.getTargetInfo().getWCharWidth(Loc) + : PP.getTargetInfo().getCharWidth(Loc); + if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) { + PP.Diag(Loc, diag::warn_octal_escape_too_large); + ResultChar &= ~0U >> (32-CharWidth); + } + break; + } + + // Otherwise, these are not valid escapes. + case '(': case '{': case '[': case '%': + // GCC accepts these as extensions. We warn about them as such though. + if (!PP.getLangOptions().NoExtensions) { + PP.Diag(Loc, diag::ext_nonstandard_escape, + std::string()+(char)ResultChar); + break; + } + // FALL THROUGH. + default: + if (isgraph(ThisTokBuf[0])) { + PP.Diag(Loc, diag::ext_unknown_escape, std::string()+(char)ResultChar); + } else { + PP.Diag(Loc, diag::ext_unknown_escape, "x"+llvm::utohexstr(ResultChar)); + } + break; + } + + return ResultChar; +} + + + + +/// integer-constant: [C99 6.4.4.1] +/// decimal-constant integer-suffix +/// octal-constant integer-suffix +/// hexadecimal-constant integer-suffix +/// decimal-constant: +/// nonzero-digit +/// decimal-constant digit +/// octal-constant: +/// 0 +/// octal-constant octal-digit +/// hexadecimal-constant: +/// hexadecimal-prefix hexadecimal-digit +/// hexadecimal-constant hexadecimal-digit +/// hexadecimal-prefix: one of +/// 0x 0X +/// integer-suffix: +/// unsigned-suffix [long-suffix] +/// unsigned-suffix [long-long-suffix] +/// long-suffix [unsigned-suffix] +/// long-long-suffix [unsigned-sufix] +/// nonzero-digit: +/// 1 2 3 4 5 6 7 8 9 +/// octal-digit: +/// 0 1 2 3 4 5 6 7 +/// hexadecimal-digit: +/// 0 1 2 3 4 5 6 7 8 9 +/// a b c d e f +/// A B C D E F +/// unsigned-suffix: one of +/// u U +/// long-suffix: one of +/// l L +/// long-long-suffix: one of +/// ll LL +/// +/// floating-constant: [C99 6.4.4.2] +/// TODO: add rules... +/// + +NumericLiteralParser:: +NumericLiteralParser(const char *begin, const char *end, + SourceLocation TokLoc, Preprocessor &pp) + : PP(pp), ThisTokBegin(begin), ThisTokEnd(end) { + s = DigitsBegin = begin; + saw_exponent = false; + saw_period = false; + saw_float_suffix = false; + isLong = false; + isUnsigned = false; + isLongLong = false; + hadError = false; + + if (*s == '0') { // parse radix + s++; + if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) { + s++; + radix = 16; + DigitsBegin = s; + s = SkipHexDigits(s); + if (s == ThisTokEnd) { + // Done. + } else if (*s == '.') { + s++; + saw_period = true; + s = SkipHexDigits(s); + } + // A binary exponent can appear with or with a '.'. If dotted, the + // binary exponent is required. + if (*s == 'p' || *s == 'P') { + s++; + saw_exponent = true; + if (*s == '+' || *s == '-') s++; // sign + const char *first_non_digit = SkipDigits(s); + if (first_non_digit == s) { + Diag(TokLoc, diag::err_exponent_has_no_digits); + return; + } else { + s = first_non_digit; + } + } else if (saw_period) { + Diag(TokLoc, diag::err_hexconstant_requires_exponent); + return; + } + } else if (*s == 'b' || *s == 'B') { + // 0b101010 is a GCC extension. + ++s; + radix = 2; + DigitsBegin = s; + s = SkipBinaryDigits(s); + if (s == ThisTokEnd) { + // Done. + } else if (isxdigit(*s)) { + Diag(TokLoc, diag::err_invalid_binary_digit, std::string(s, s+1)); + return; + } + PP.Diag(TokLoc, diag::ext_binary_literal); + } else { + // For now, the radix is set to 8. If we discover that we have a + // floating point constant, the radix will change to 10. Octal floating + // point constants are not permitted (only decimal and hexadecimal). + radix = 8; + DigitsBegin = s; + s = SkipOctalDigits(s); + if (s == ThisTokEnd) { + // Done. + } else if (isxdigit(*s)) { + Diag(TokLoc, diag::err_invalid_octal_digit, std::string(s, s+1)); + return; + } else if (*s == '.') { + s++; + radix = 10; + saw_period = true; + s = SkipDigits(s); + } + if (*s == 'e' || *s == 'E') { // exponent + s++; + radix = 10; + saw_exponent = true; + if (*s == '+' || *s == '-') s++; // sign + const char *first_non_digit = SkipDigits(s); + if (first_non_digit == s) { + Diag(TokLoc, diag::err_exponent_has_no_digits); + return; + } else { + s = first_non_digit; + } + } + } + } else { // the first digit is non-zero + radix = 10; + s = SkipDigits(s); + if (s == ThisTokEnd) { + // Done. + } else if (isxdigit(*s)) { + Diag(TokLoc, diag::err_invalid_decimal_digit, std::string(s, s+1)); + return; + } else if (*s == '.') { + s++; + saw_period = true; + s = SkipDigits(s); + } + if (*s == 'e' || *s == 'E') { // exponent + s++; + saw_exponent = true; + if (*s == '+' || *s == '-') s++; // sign + const char *first_non_digit = SkipDigits(s); + if (first_non_digit == s) { + Diag(TokLoc, diag::err_exponent_has_no_digits); + return; + } else { + s = first_non_digit; + } + } + } + + SuffixBegin = s; + + if (saw_period || saw_exponent) { + if (s < ThisTokEnd) { // parse size suffix (float, long double) + if (*s == 'f' || *s == 'F') { + saw_float_suffix = true; + s++; + } else if (*s == 'l' || *s == 'L') { + isLong = true; + s++; + } + if (s != ThisTokEnd) { + Diag(TokLoc, diag::err_invalid_suffix_float_constant, + std::string(SuffixBegin, ThisTokEnd)); + return; + } + } + } else { + if (s < ThisTokEnd) { + // parse int suffix - they can appear in any order ("ul", "lu", "llu"). + if (*s == 'u' || *s == 'U') { + s++; + isUnsigned = true; // unsigned + + if ((s < ThisTokEnd) && (*s == 'l' || *s == 'L')) { + s++; + // handle "long long" type - l's need to be adjacent and same case. + if ((s < ThisTokEnd) && (*s == *(s-1))) { + isLongLong = true; // unsigned long long + s++; + } else { + isLong = true; // unsigned long + } + } + } else if (*s == 'l' || *s == 'L') { + s++; + // handle "long long" types - l's need to be adjacent and same case. + if ((s < ThisTokEnd) && (*s == *(s-1))) { + s++; + if ((s < ThisTokEnd) && (*s == 'u' || *s == 'U')) { + isUnsigned = true; // unsigned long long + s++; + } else { + isLongLong = true; // long long + } + } else { // handle "long" types + if ((s < ThisTokEnd) && (*s == 'u' || *s == 'U')) { + isUnsigned = true; // unsigned long + s++; + } else { + isLong = true; // long + } + } + } + if (s != ThisTokEnd) { + Diag(TokLoc, diag::err_invalid_suffix_integer_constant, + std::string(SuffixBegin, ThisTokEnd)); + return; + } + } + } +} + +/// GetIntegerValue - Convert this numeric literal value to an APInt that +/// matches Val's input width. If there is an overflow, set Val to the low bits +/// of the result and return true. Otherwise, return false. +bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) { + Val = 0; + s = DigitsBegin; + + llvm::APInt RadixVal(Val.getBitWidth(), radix); + llvm::APInt CharVal(Val.getBitWidth(), 0); + llvm::APInt OldVal = Val; + + bool OverflowOccurred = false; + while (s < SuffixBegin) { + unsigned C = HexDigitValue(*s++); + + // If this letter is out of bound for this radix, reject it. + assert(C < radix && "NumericLiteralParser ctor should have rejected this"); + + CharVal = C; + + // Add the digit to the value in the appropriate radix. If adding in digits + // made the value smaller, then this overflowed. + OldVal = Val; + + // Multiply by radix, did overflow occur on the multiply? + Val *= RadixVal; + OverflowOccurred |= Val.udiv(RadixVal) != OldVal; + + OldVal = Val; + // Add value, did overflow occur on the value? + Val += CharVal; + OverflowOccurred |= Val.ult(OldVal); + OverflowOccurred |= Val.ult(CharVal); + } + return OverflowOccurred; +} + +// GetFloatValue - Poor man's floatvalue (FIXME). +float NumericLiteralParser::GetFloatValue() { + char floatChars[256]; + strncpy(floatChars, ThisTokBegin, ThisTokEnd-ThisTokBegin); + floatChars[ThisTokEnd-ThisTokBegin] = '\0'; + return strtof(floatChars, 0); +} + +void NumericLiteralParser::Diag(SourceLocation Loc, unsigned DiagID, + const std::string &M) { + PP.Diag(Loc, DiagID, M); + hadError = true; +} + + +CharLiteralParser::CharLiteralParser(const char *begin, const char *end, + SourceLocation Loc, Preprocessor &PP) { + // At this point we know that the character matches the regex "L?'.*'". + HadError = false; + Value = 0; + + // Determine if this is a wide character. + IsWide = begin[0] == 'L'; + if (IsWide) ++begin; + + // Skip over the entry quote. + assert(begin[0] == '\'' && "Invalid token lexed"); + ++begin; + + // FIXME: This assumes that 'int' is 32-bits in overflow calculation, and the + // size of "value". + assert(PP.getTargetInfo().getIntWidth(Loc) == 32 && + "Assumes sizeof(int) == 4 for now"); + // FIXME: This assumes that wchar_t is 32-bits for now. + assert(PP.getTargetInfo().getWCharWidth(Loc) == 32 && + "Assumes sizeof(wchar_t) == 4 for now"); + // FIXME: This extensively assumes that 'char' is 8-bits. + assert(PP.getTargetInfo().getCharWidth(Loc) == 8 && + "Assumes char is 8 bits"); + + bool isFirstChar = true; + bool isMultiChar = false; + while (begin[0] != '\'') { + unsigned ResultChar; + if (begin[0] != '\\') // If this is a normal character, consume it. + ResultChar = *begin++; + else // Otherwise, this is an escape character. + ResultChar = ProcessCharEscape(begin, end, HadError, Loc, IsWide, PP); + + // If this is a multi-character constant (e.g. 'abc'), handle it. These are + // implementation defined (C99 6.4.4.4p10). + if (!isFirstChar) { + // If this is the second character being processed, do special handling. + if (!isMultiChar) { + isMultiChar = true; + + // Warn about discarding the top bits for multi-char wide-character + // constants (L'abcd'). + if (IsWide) + PP.Diag(Loc, diag::warn_extraneous_wide_char_constant); + } + + if (IsWide) { + // Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'. + Value = 0; + } else { + // Narrow character literals act as though their value is concatenated + // in this implementation. + if (((Value << 8) >> 8) != Value) + PP.Diag(Loc, diag::warn_char_constant_too_large); + Value <<= 8; + } + } + + Value += ResultChar; + isFirstChar = false; + } + + // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1") + // if 'char' is signed for this target (C99 6.4.4.4p10). Note that multiple + // character constants are not sign extended in the this implementation: + // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC. + if (!IsWide && !isMultiChar && (Value & 128) && + PP.getTargetInfo().isCharSigned(Loc)) + Value = (signed char)Value; +} + + +/// string-literal: [C99 6.4.5] +/// " [s-char-sequence] " +/// L" [s-char-sequence] " +/// s-char-sequence: +/// s-char +/// s-char-sequence s-char +/// s-char: +/// any source character except the double quote ", +/// backslash \, or newline character +/// escape-character +/// universal-character-name +/// escape-character: [C99 6.4.4.4] +/// \ escape-code +/// universal-character-name +/// escape-code: +/// character-escape-code +/// octal-escape-code +/// hex-escape-code +/// character-escape-code: one of +/// n t b r f v a +/// \ ' " ? +/// octal-escape-code: +/// octal-digit +/// octal-digit octal-digit +/// octal-digit octal-digit octal-digit +/// hex-escape-code: +/// x hex-digit +/// hex-escape-code hex-digit +/// universal-character-name: +/// \u hex-quad +/// \U hex-quad hex-quad +/// hex-quad: +/// hex-digit hex-digit hex-digit hex-digit +/// +StringLiteralParser:: +StringLiteralParser(const LexerToken *StringToks, unsigned NumStringToks, + Preprocessor &pp, TargetInfo &t) + : PP(pp), Target(t) { + // Scan all of the string portions, remember the max individual token length, + // computing a bound on the concatenated string length, and see whether any + // piece is a wide-string. If any of the string portions is a wide-string + // literal, the result is a wide-string literal [C99 6.4.5p4]. + MaxTokenLength = StringToks[0].getLength(); + SizeBound = StringToks[0].getLength()-2; // -2 for "". + AnyWide = StringToks[0].getKind() == tok::wide_string_literal; + + hadError = false; + + // Implement Translation Phase #6: concatenation of string literals + /// (C99 5.1.1.2p1). The common case is only one string fragment. + for (unsigned i = 1; i != NumStringToks; ++i) { + // The string could be shorter than this if it needs cleaning, but this is a + // reasonable bound, which is all we need. + SizeBound += StringToks[i].getLength()-2; // -2 for "". + + // Remember maximum string piece length. + if (StringToks[i].getLength() > MaxTokenLength) + MaxTokenLength = StringToks[i].getLength(); + + // Remember if we see any wide strings. + AnyWide |= StringToks[i].getKind() == tok::wide_string_literal; + } + + + // Include space for the null terminator. + ++SizeBound; + + // TODO: K&R warning: "traditional C rejects string constant concatenation" + + // Get the width in bytes of wchar_t. If no wchar_t strings are used, do not + // query the target. As such, wchar_tByteWidth is only valid if AnyWide=true. + wchar_tByteWidth = ~0U; + if (AnyWide) { + wchar_tByteWidth = Target.getWCharWidth(StringToks[0].getLocation()); + assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!"); + wchar_tByteWidth /= 8; + } + + // The output buffer size needs to be large enough to hold wide characters. + // This is a worst-case assumption which basically corresponds to L"" "long". + if (AnyWide) + SizeBound *= wchar_tByteWidth; + + // Size the temporary buffer to hold the result string data. + ResultBuf.resize(SizeBound); + + // Likewise, but for each string piece. + llvm::SmallString<512> TokenBuf; + TokenBuf.resize(MaxTokenLength); + + // Loop over all the strings, getting their spelling, and expanding them to + // wide strings as appropriate. + ResultPtr = &ResultBuf[0]; // Next byte to fill in. + + for (unsigned i = 0, e = NumStringToks; i != e; ++i) { + const char *ThisTokBuf = &TokenBuf[0]; + // Get the spelling of the token, which eliminates trigraphs, etc. We know + // that ThisTokBuf points to a buffer that is big enough for the whole token + // and 'spelled' tokens can only shrink. + unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf); + const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote. + + // TODO: Input character set mapping support. + + // Skip L marker for wide strings. + bool ThisIsWide = false; + if (ThisTokBuf[0] == 'L') { + ++ThisTokBuf; + ThisIsWide = true; + } + + assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?"); + ++ThisTokBuf; + + while (ThisTokBuf != ThisTokEnd) { + // Is this a span of non-escape characters? + if (ThisTokBuf[0] != '\\') { + const char *InStart = ThisTokBuf; + do { + ++ThisTokBuf; + } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\'); + + // Copy the character span over. + unsigned Len = ThisTokBuf-InStart; + if (!AnyWide) { + memcpy(ResultPtr, InStart, Len); + ResultPtr += Len; + } else { + // Note: our internal rep of wide char tokens is always little-endian. + for (; Len; --Len, ++InStart) { + *ResultPtr++ = InStart[0]; + // Add zeros at the end. + for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) + *ResultPtr++ = 0; + } + } + continue; + } + + // Otherwise, this is an escape character. Process it. + unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError, + StringToks[i].getLocation(), + ThisIsWide, PP); + + // Note: our internal rep of wide char tokens is always little-endian. + *ResultPtr++ = ResultChar & 0xFF; + + if (AnyWide) { + for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) + *ResultPtr++ = ResultChar >> i*8; + } + } + } + + // Add zero terminator. + *ResultPtr = 0; + if (AnyWide) { + for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i) + *ResultPtr++ = 0; + } +} diff --git a/Lex/MacroExpander.cpp b/Lex/MacroExpander.cpp new file mode 100644 index 0000000000..a45efbd86e --- /dev/null +++ b/Lex/MacroExpander.cpp @@ -0,0 +1,636 @@ +//===--- MacroExpander.cpp - Lex from a macro expansion -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MacroExpander interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/MacroExpander.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/SmallVector.h" +using namespace clang; + +//===----------------------------------------------------------------------===// +// MacroArgs Implementation +//===----------------------------------------------------------------------===// + +/// MacroArgs ctor function - This destroys the vector passed in. +MacroArgs *MacroArgs::create(const MacroInfo *MI, + const LexerToken *UnexpArgTokens, + unsigned NumToks, bool VarargsElided) { + assert(MI->isFunctionLike() && + "Can't have args for an object-like macro!"); + + // Allocate memory for the MacroArgs object with the lexer tokens at the end. + MacroArgs *Result = (MacroArgs*)malloc(sizeof(MacroArgs) + + NumToks*sizeof(LexerToken)); + // Construct the macroargs object. + new (Result) MacroArgs(NumToks, VarargsElided); + + // Copy the actual unexpanded tokens to immediately after the result ptr. + if (NumToks) + memcpy(const_cast<LexerToken*>(Result->getUnexpArgument(0)), + UnexpArgTokens, NumToks*sizeof(LexerToken)); + + return Result; +} + +/// destroy - Destroy and deallocate the memory for this object. +/// +void MacroArgs::destroy() { + // Run the dtor to deallocate the vectors. + this->~MacroArgs(); + // Release the memory for the object. + free(this); +} + + +/// getArgLength - Given a pointer to an expanded or unexpanded argument, +/// return the number of tokens, not counting the EOF, that make up the +/// argument. +unsigned MacroArgs::getArgLength(const LexerToken *ArgPtr) { + unsigned NumArgTokens = 0; + for (; ArgPtr->getKind() != tok::eof; ++ArgPtr) + ++NumArgTokens; + return NumArgTokens; +} + + +/// getUnexpArgument - Return the unexpanded tokens for the specified formal. +/// +const LexerToken *MacroArgs::getUnexpArgument(unsigned Arg) const { + // The unexpanded argument tokens start immediately after the MacroArgs object + // in memory. + const LexerToken *Start = (const LexerToken *)(this+1); + const LexerToken *Result = Start; + // Scan to find Arg. + for (; Arg; ++Result) { + assert(Result < Start+NumUnexpArgTokens && "Invalid arg #"); + if (Result->getKind() == tok::eof) + --Arg; + } + return Result; +} + + +/// ArgNeedsPreexpansion - If we can prove that the argument won't be affected +/// by pre-expansion, return false. Otherwise, conservatively return true. +bool MacroArgs::ArgNeedsPreexpansion(const LexerToken *ArgTok) const { + // If there are no identifiers in the argument list, or if the identifiers are + // known to not be macros, pre-expansion won't modify it. + for (; ArgTok->getKind() != tok::eof; ++ArgTok) + if (IdentifierInfo *II = ArgTok->getIdentifierInfo()) { + if (II->getMacroInfo() && II->getMacroInfo()->isEnabled()) + // Return true even though the macro could be a function-like macro + // without a following '(' token. + return true; + } + return false; +} + +/// getPreExpArgument - Return the pre-expanded form of the specified +/// argument. +const std::vector<LexerToken> & +MacroArgs::getPreExpArgument(unsigned Arg, Preprocessor &PP) { + assert(Arg < NumUnexpArgTokens && "Invalid argument number!"); + + // If we have already computed this, return it. + if (PreExpArgTokens.empty()) + PreExpArgTokens.resize(NumUnexpArgTokens); + + std::vector<LexerToken> &Result = PreExpArgTokens[Arg]; + if (!Result.empty()) return Result; + + const LexerToken *AT = getUnexpArgument(Arg); + unsigned NumToks = getArgLength(AT)+1; // Include the EOF. + + // Otherwise, we have to pre-expand this argument, populating Result. To do + // this, we set up a fake MacroExpander to lex from the unexpanded argument + // list. With this installed, we lex expanded tokens until we hit the EOF + // token at the end of the unexp list. + PP.EnterTokenStream(AT, NumToks); + + // Lex all of the macro-expanded tokens into Result. + do { + Result.push_back(LexerToken()); + PP.Lex(Result.back()); + } while (Result.back().getKind() != tok::eof); + + // Pop the token stream off the top of the stack. We know that the internal + // pointer inside of it is to the "end" of the token stream, but the stack + // will not otherwise be popped until the next token is lexed. The problem is + // that the token may be lexed sometime after the vector of tokens itself is + // destroyed, which would be badness. + PP.RemoveTopOfLexerStack(); + return Result; +} + + +/// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of +/// tokens into the literal string token that should be produced by the C # +/// preprocessor operator. +/// +static LexerToken StringifyArgument(const LexerToken *ArgToks, + Preprocessor &PP, bool Charify = false) { + LexerToken Tok; + Tok.startToken(); + Tok.setKind(tok::string_literal); + + const LexerToken *ArgTokStart = ArgToks; + + // Stringify all the tokens. + std::string Result = "\""; + // FIXME: Optimize this loop to not use std::strings. + bool isFirst = true; + for (; ArgToks->getKind() != tok::eof; ++ArgToks) { + const LexerToken &Tok = *ArgToks; + if (!isFirst && Tok.hasLeadingSpace()) + Result += ' '; + isFirst = false; + + // If this is a string or character constant, escape the token as specified + // by 6.10.3.2p2. + if (Tok.getKind() == tok::string_literal || // "foo" + Tok.getKind() == tok::wide_string_literal || // L"foo" + Tok.getKind() == tok::char_constant) { // 'x' and L'x'. + Result += Lexer::Stringify(PP.getSpelling(Tok)); + } else { + // Otherwise, just append the token. + Result += PP.getSpelling(Tok); + } + } + + // If the last character of the string is a \, and if it isn't escaped, this + // is an invalid string literal, diagnose it as specified in C99. + if (Result[Result.size()-1] == '\\') { + // Count the number of consequtive \ characters. If even, then they are + // just escaped backslashes, otherwise it's an error. + unsigned FirstNonSlash = Result.size()-2; + // Guaranteed to find the starting " if nothing else. + while (Result[FirstNonSlash] == '\\') + --FirstNonSlash; + if ((Result.size()-1-FirstNonSlash) & 1) { + // Diagnose errors for things like: #define F(X) #X / F(\) + PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal); + Result.erase(Result.end()-1); // remove one of the \'s. + } + } + Result += '"'; + + // If this is the charify operation and the result is not a legal character + // constant, diagnose it. + if (Charify) { + // First step, turn double quotes into single quotes: + Result[0] = '\''; + Result[Result.size()-1] = '\''; + + // Check for bogus character. + bool isBad = false; + if (Result.size() == 3) { + isBad = Result[1] == '\''; // ''' is not legal. '\' already fixed above. + } else { + isBad = (Result.size() != 4 || Result[1] != '\\'); // Not '\x' + } + + if (isBad) { + PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify); + Result = "' '"; // Use something arbitrary, but legal. + } + } + + Tok.setLength(Result.size()); + Tok.setLocation(PP.CreateString(&Result[0], Result.size())); + return Tok; +} + +/// getStringifiedArgument - Compute, cache, and return the specified argument +/// that has been 'stringified' as required by the # operator. +const LexerToken &MacroArgs::getStringifiedArgument(unsigned ArgNo, + Preprocessor &PP) { + assert(ArgNo < NumUnexpArgTokens && "Invalid argument number!"); + if (StringifiedArgs.empty()) { + StringifiedArgs.resize(getNumArguments()); + memset(&StringifiedArgs[0], 0, + sizeof(StringifiedArgs[0])*getNumArguments()); + } + if (StringifiedArgs[ArgNo].getKind() != tok::string_literal) + StringifiedArgs[ArgNo] = StringifyArgument(getUnexpArgument(ArgNo), PP); + return StringifiedArgs[ArgNo]; +} + +//===----------------------------------------------------------------------===// +// MacroExpander Implementation +//===----------------------------------------------------------------------===// + +/// Create a macro expander for the specified macro with the specified actual +/// arguments. Note that this ctor takes ownership of the ActualArgs pointer. +MacroExpander::MacroExpander(LexerToken &Tok, MacroArgs *Actuals, + Preprocessor &pp) + : Macro(Tok.getIdentifierInfo()->getMacroInfo()), + ActualArgs(Actuals), PP(pp), CurToken(0), + InstantiateLoc(Tok.getLocation()), + AtStartOfLine(Tok.isAtStartOfLine()), + HasLeadingSpace(Tok.hasLeadingSpace()) { + MacroTokens = &Macro->getReplacementTokens()[0]; + NumMacroTokens = Macro->getReplacementTokens().size(); + + // If this is a function-like macro, expand the arguments and change + // MacroTokens to point to the expanded tokens. + if (Macro->isFunctionLike() && Macro->getNumArgs()) + ExpandFunctionArguments(); + + // Mark the macro as currently disabled, so that it is not recursively + // expanded. The macro must be disabled only after argument pre-expansion of + // function-like macro arguments occurs. + Macro->DisableMacro(); +} + +/// Create a macro expander for the specified token stream. This does not +/// take ownership of the specified token vector. +MacroExpander::MacroExpander(const LexerToken *TokArray, unsigned NumToks, + Preprocessor &pp) + : Macro(0), ActualArgs(0), PP(pp), MacroTokens(TokArray), + NumMacroTokens(NumToks), CurToken(0), + InstantiateLoc(SourceLocation()), AtStartOfLine(false), + HasLeadingSpace(false) { + + // Set HasLeadingSpace/AtStartOfLine so that the first token will be + // returned unmodified. + if (NumToks != 0) { + AtStartOfLine = TokArray[0].isAtStartOfLine(); + HasLeadingSpace = TokArray[0].hasLeadingSpace(); + } +} + + +MacroExpander::~MacroExpander() { + // If this was a function-like macro that actually uses its arguments, delete + // the expanded tokens. + if (Macro && MacroTokens != &Macro->getReplacementTokens()[0]) + delete [] MacroTokens; + + // MacroExpander owns its formal arguments. + if (ActualArgs) ActualArgs->destroy(); +} + +/// Expand the arguments of a function-like macro so that we can quickly +/// return preexpanded tokens from MacroTokens. +void MacroExpander::ExpandFunctionArguments() { + llvm::SmallVector<LexerToken, 128> ResultToks; + + // Loop through the MacroTokens tokens, expanding them into ResultToks. Keep + // track of whether we change anything. If not, no need to keep them. If so, + // we install the newly expanded sequence as MacroTokens. + bool MadeChange = false; + + // NextTokGetsSpace - When this is true, the next token appended to the + // output list will get a leading space, regardless of whether it had one to + // begin with or not. This is used for placemarker support. + bool NextTokGetsSpace = false; + + for (unsigned i = 0, e = NumMacroTokens; i != e; ++i) { + // If we found the stringify operator, get the argument stringified. The + // preprocessor already verified that the following token is a macro name + // when the #define was parsed. + const LexerToken &CurTok = MacroTokens[i]; + if (CurTok.getKind() == tok::hash || CurTok.getKind() == tok::hashat) { + int ArgNo = Macro->getArgumentNum(MacroTokens[i+1].getIdentifierInfo()); + assert(ArgNo != -1 && "Token following # is not an argument?"); + + LexerToken Res; + if (CurTok.getKind() == tok::hash) // Stringify + Res = ActualArgs->getStringifiedArgument(ArgNo, PP); + else { + // 'charify': don't bother caching these. + Res = StringifyArgument(ActualArgs->getUnexpArgument(ArgNo), PP, true); + } + + // The stringified/charified string leading space flag gets set to match + // the #/#@ operator. + if (CurTok.hasLeadingSpace() || NextTokGetsSpace) + Res.setFlag(LexerToken::LeadingSpace); + + ResultToks.push_back(Res); + MadeChange = true; + ++i; // Skip arg name. + NextTokGetsSpace = false; + continue; + } + + // Otherwise, if this is not an argument token, just add the token to the + // output buffer. + IdentifierInfo *II = CurTok.getIdentifierInfo(); + int ArgNo = II ? Macro->getArgumentNum(II) : -1; + if (ArgNo == -1) { + // This isn't an argument, just add it. + ResultToks.push_back(CurTok); + + if (NextTokGetsSpace) { + ResultToks.back().setFlag(LexerToken::LeadingSpace); + NextTokGetsSpace = false; + } + continue; + } + + // An argument is expanded somehow, the result is different than the + // input. + MadeChange = true; + + // Otherwise, this is a use of the argument. Find out if there is a paste + // (##) operator before or after the argument. + bool PasteBefore = + !ResultToks.empty() && ResultToks.back().getKind() == tok::hashhash; + bool PasteAfter = i+1 != e && MacroTokens[i+1].getKind() == tok::hashhash; + + // If it is not the LHS/RHS of a ## operator, we must pre-expand the + // argument and substitute the expanded tokens into the result. This is + // C99 6.10.3.1p1. + if (!PasteBefore && !PasteAfter) { + const LexerToken *ResultArgToks; + + // Only preexpand the argument if it could possibly need it. This + // avoids some work in common cases. + const LexerToken *ArgTok = ActualArgs->getUnexpArgument(ArgNo); + if (ActualArgs->ArgNeedsPreexpansion(ArgTok)) + ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0]; + else + ResultArgToks = ArgTok; // Use non-preexpanded tokens. + + // If the arg token expanded into anything, append it. + if (ResultArgToks->getKind() != tok::eof) { + unsigned FirstResult = ResultToks.size(); + unsigned NumToks = MacroArgs::getArgLength(ResultArgToks); + ResultToks.append(ResultArgToks, ResultArgToks+NumToks); + + // If any tokens were substituted from the argument, the whitespace + // before the first token should match the whitespace of the arg + // identifier. + ResultToks[FirstResult].setFlagValue(LexerToken::LeadingSpace, + CurTok.hasLeadingSpace() || + NextTokGetsSpace); + NextTokGetsSpace = false; + } else { + // If this is an empty argument, and if there was whitespace before the + // formal token, make sure the next token gets whitespace before it. + NextTokGetsSpace = CurTok.hasLeadingSpace(); + } + continue; + } + + // Okay, we have a token that is either the LHS or RHS of a paste (##) + // argument. It gets substituted as its non-pre-expanded tokens. + const LexerToken *ArgToks = ActualArgs->getUnexpArgument(ArgNo); + unsigned NumToks = MacroArgs::getArgLength(ArgToks); + if (NumToks) { // Not an empty argument? + ResultToks.append(ArgToks, ArgToks+NumToks); + + // If the next token was supposed to get leading whitespace, ensure it has + // it now. + if (NextTokGetsSpace) { + ResultToks[ResultToks.size()-NumToks].setFlag(LexerToken::LeadingSpace); + NextTokGetsSpace = false; + } + continue; + } + + // If an empty argument is on the LHS or RHS of a paste, the standard (C99 + // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We + // implement this by eating ## operators when a LHS or RHS expands to + // empty. + NextTokGetsSpace |= CurTok.hasLeadingSpace(); + if (PasteAfter) { + // Discard the argument token and skip (don't copy to the expansion + // buffer) the paste operator after it. + NextTokGetsSpace |= MacroTokens[i+1].hasLeadingSpace(); + ++i; + continue; + } + + // If this is on the RHS of a paste operator, we've already copied the + // paste operator to the ResultToks list. Remove it. + assert(PasteBefore && ResultToks.back().getKind() == tok::hashhash); + NextTokGetsSpace |= ResultToks.back().hasLeadingSpace(); + ResultToks.pop_back(); + + // If this is the __VA_ARGS__ token, and if the argument wasn't provided, + // and if the macro had at least one real argument, and if the token before + // the ## was a comma, remove the comma. + if ((unsigned)ArgNo == Macro->getNumArgs()-1 && // is __VA_ARGS__ + ActualArgs->isVarargsElidedUse() && // Argument elided. + !ResultToks.empty() && ResultToks.back().getKind() == tok::comma) { + // Never add a space, even if the comma, ##, or arg had a space. + NextTokGetsSpace = false; + ResultToks.pop_back(); + } + continue; + } + + // If anything changed, install this as the new MacroTokens list. + if (MadeChange) { + // This is deleted in the dtor. + NumMacroTokens = ResultToks.size(); + LexerToken *Res = new LexerToken[ResultToks.size()]; + if (NumMacroTokens) + memcpy(Res, &ResultToks[0], NumMacroTokens*sizeof(LexerToken)); + MacroTokens = Res; + } +} + +/// Lex - Lex and return a token from this macro stream. +/// +void MacroExpander::Lex(LexerToken &Tok) { + // Lexing off the end of the macro, pop this macro off the expansion stack. + if (isAtEnd()) { + // If this is a macro (not a token stream), mark the macro enabled now + // that it is no longer being expanded. + if (Macro) Macro->EnableMacro(); + + // Pop this context off the preprocessors lexer stack and get the next + // token. This will delete "this" so remember the PP instance var. + Preprocessor &PPCache = PP; + if (PP.HandleEndOfMacro(Tok)) + return; + + // HandleEndOfMacro may not return a token. If it doesn't, lex whatever is + // next. + return PPCache.Lex(Tok); + } + + // If this is the first token of the expanded result, we inherit spacing + // properties later. + bool isFirstToken = CurToken == 0; + + // Get the next token to return. + Tok = MacroTokens[CurToken++]; + + // If this token is followed by a token paste (##) operator, paste the tokens! + if (!isAtEnd() && MacroTokens[CurToken].getKind() == tok::hashhash) + PasteTokens(Tok); + + // The token's current location indicate where the token was lexed from. We + // need this information to compute the spelling of the token, but any + // diagnostics for the expanded token should appear as if they came from + // InstantiationLoc. Pull this information together into a new SourceLocation + // that captures all of this. + if (InstantiateLoc.isValid()) { // Don't do this for token streams. + SourceManager &SrcMgr = PP.getSourceManager(); + // The token could have come from a prior macro expansion. In that case, + // ignore the macro expand part to get to the physloc. This happens for + // stuff like: #define A(X) X A(A(X)) A(1) + SourceLocation PhysLoc = SrcMgr.getPhysicalLoc(Tok.getLocation()); + Tok.setLocation(SrcMgr.getInstantiationLoc(PhysLoc, InstantiateLoc)); + } + + // If this is the first token, set the lexical properties of the token to + // match the lexical properties of the macro identifier. + if (isFirstToken) { + Tok.setFlagValue(LexerToken::StartOfLine , AtStartOfLine); + Tok.setFlagValue(LexerToken::LeadingSpace, HasLeadingSpace); + } + + // Handle recursive expansion! + if (Tok.getIdentifierInfo()) + return PP.HandleIdentifier(Tok); + + // Otherwise, return a normal token. +} + +/// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ## +/// operator. Read the ## and RHS, and paste the LHS/RHS together. If there +/// are is another ## after it, chomp it iteratively. Return the result as Tok. +void MacroExpander::PasteTokens(LexerToken &Tok) { + llvm::SmallVector<char, 128> Buffer; + do { + // Consume the ## operator. + SourceLocation PasteOpLoc = MacroTokens[CurToken].getLocation(); + ++CurToken; + assert(!isAtEnd() && "No token on the RHS of a paste operator!"); + + // Get the RHS token. + const LexerToken &RHS = MacroTokens[CurToken]; + + bool isInvalid = false; + + // Allocate space for the result token. This is guaranteed to be enough for + // the two tokens and a null terminator. + Buffer.resize(Tok.getLength() + RHS.getLength() + 1); + + // Get the spelling of the LHS token in Buffer. + const char *BufPtr = &Buffer[0]; + unsigned LHSLen = PP.getSpelling(Tok, BufPtr); + if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer! + memcpy(&Buffer[0], BufPtr, LHSLen); + + BufPtr = &Buffer[LHSLen]; + unsigned RHSLen = PP.getSpelling(RHS, BufPtr); + if (BufPtr != &Buffer[LHSLen]) // Really, we want the chars in Buffer! + memcpy(&Buffer[LHSLen], BufPtr, RHSLen); + + // Add null terminator. + Buffer[LHSLen+RHSLen] = '\0'; + + // Trim excess space. + Buffer.resize(LHSLen+RHSLen+1); + + // Plop the pasted result (including the trailing newline and null) into a + // scratch buffer where we can lex it. + SourceLocation ResultTokLoc = PP.CreateString(&Buffer[0], Buffer.size()); + + // Lex the resultant pasted token into Result. + LexerToken Result; + + // Avoid testing /*, as the lexer would think it is the start of a comment + // and emit an error that it is unterminated. + if (Tok.getKind() == tok::slash && RHS.getKind() == tok::star) { + isInvalid = true; + } else if (Tok.getKind() == tok::identifier && + RHS.getKind() == tok::identifier) { + // Common paste case: identifier+identifier = identifier. Avoid creating + // a lexer and other overhead. + PP.IncrementPasteCounter(true); + Result.startToken(); + Result.setKind(tok::identifier); + Result.setLocation(ResultTokLoc); + Result.setLength(LHSLen+RHSLen); + } else { + PP.IncrementPasteCounter(false); + + // Make a lexer to lex this string from. + SourceManager &SourceMgr = PP.getSourceManager(); + const char *ResultStrData = SourceMgr.getCharacterData(ResultTokLoc); + + unsigned FileID = ResultTokLoc.getFileID(); + assert(FileID && "Could not get FileID for paste?"); + + // Make a lexer object so that we lex and expand the paste result. + Lexer *TL = new Lexer(SourceMgr.getBuffer(FileID), FileID, PP, + ResultStrData, + ResultStrData+LHSLen+RHSLen /*don't include null*/); + + // Lex a token in raw mode. This way it won't look up identifiers + // automatically, lexing off the end will return an eof token, and + // warnings are disabled. This returns true if the result token is the + // entire buffer. + bool IsComplete = TL->LexRawToken(Result); + + // If we got an EOF token, we didn't form even ONE token. For example, we + // did "/ ## /" to get "//". + IsComplete &= Result.getKind() != tok::eof; + isInvalid = !IsComplete; + + // We're now done with the temporary lexer. + delete TL; + } + + // If pasting the two tokens didn't form a full new token, this is an error. + // This occurs with "x ## +" and other stuff. Return with Tok unmodified + // and with RHS as the next token to lex. + if (isInvalid) { + // If not in assembler language mode. + PP.Diag(PasteOpLoc, diag::err_pp_bad_paste, + std::string(Buffer.begin(), Buffer.end()-1)); + return; + } + + // Turn ## into 'other' to avoid # ## # from looking like a paste operator. + if (Result.getKind() == tok::hashhash) + Result.setKind(tok::unknown); + // FIXME: Turn __VARRGS__ into "not a token"? + + // Transfer properties of the LHS over the the Result. + Result.setFlagValue(LexerToken::StartOfLine , Tok.isAtStartOfLine()); + Result.setFlagValue(LexerToken::LeadingSpace, Tok.hasLeadingSpace()); + + // Finally, replace LHS with the result, consume the RHS, and iterate. + ++CurToken; + Tok = Result; + } while (!isAtEnd() && MacroTokens[CurToken].getKind() == tok::hashhash); + + // Now that we got the result token, it will be subject to expansion. Since + // token pasting re-lexes the result token in raw mode, identifier information + // isn't looked up. As such, if the result is an identifier, look up id info. + if (Tok.getKind() == tok::identifier) { + // Look up the identifier info for the token. We disabled identifier lookup + // by saying we're skipping contents, so we need to do this manually. + Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok)); + } +} + +/// isNextTokenLParen - If the next token lexed will pop this macro off the +/// expansion stack, return 2. If the next unexpanded token is a '(', return +/// 1, otherwise return 0. +unsigned MacroExpander::isNextTokenLParen() const { + // Out of tokens? + if (isAtEnd()) + return 2; + return MacroTokens[CurToken].getKind() == tok::l_paren; +} diff --git a/Lex/MacroInfo.cpp b/Lex/MacroInfo.cpp new file mode 100644 index 0000000000..e6dae42769 --- /dev/null +++ b/Lex/MacroInfo.cpp @@ -0,0 +1,70 @@ +//===--- MacroInfo.cpp - Information about #defined identifiers -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MacroInfo interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/Preprocessor.h" +using namespace clang; + +MacroInfo::MacroInfo(SourceLocation DefLoc) : Location(DefLoc) { + IsFunctionLike = false; + IsC99Varargs = false; + IsGNUVarargs = false; + IsBuiltinMacro = false; + IsTargetSpecific = false; + IsDisabled = false; + IsUsed = true; +} + +/// isIdenticalTo - Return true if the specified macro definition is equal to +/// this macro in spelling, arguments, and whitespace. This is used to emit +/// duplicate definition warnings. This implements the rules in C99 6.10.3. +/// +/// Note that this intentionally does not check isTargetSpecific for matching. +/// +bool MacroInfo::isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const { + // Check # tokens in replacement, number of args, and various flags all match. + if (ReplacementTokens.size() != Other.ReplacementTokens.size() || + Arguments.size() != Other.Arguments.size() || + isFunctionLike() != Other.isFunctionLike() || + isC99Varargs() != Other.isC99Varargs() || + isGNUVarargs() != Other.isGNUVarargs()) + return false; + + // Check arguments. + for (arg_iterator I = arg_begin(), OI = Other.arg_begin(), E = arg_end(); + I != E; ++I, ++OI) + if (*I != *OI) return false; + + // Check all the tokens. + for (unsigned i = 0, e = ReplacementTokens.size(); i != e; ++i) { + const LexerToken &A = ReplacementTokens[i]; + const LexerToken &B = Other.ReplacementTokens[i]; + if (A.getKind() != B.getKind() || + A.isAtStartOfLine() != B.isAtStartOfLine() || + A.hasLeadingSpace() != B.hasLeadingSpace()) + return false; + + // If this is an identifier, it is easy. + if (A.getIdentifierInfo() || B.getIdentifierInfo()) { + if (A.getIdentifierInfo() != B.getIdentifierInfo()) + return false; + continue; + } + + // Otherwise, check the spelling. + if (PP.getSpelling(A) != PP.getSpelling(B)) + return false; + } + + return true; +} diff --git a/Lex/Makefile b/Lex/Makefile new file mode 100644 index 0000000000..f56aed0589 --- /dev/null +++ b/Lex/Makefile @@ -0,0 +1,28 @@ +##===- clang/Lex/Makefile ----------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file was developed by Chris Lattner and is distributed under +# the University of Illinois Open Source License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This implements the Lexer library for the C-Language front-end. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +include $(LEVEL)/Makefile.config + +LIBRARYNAME := clangLex +BUILD_ARCHIVE = 1 +CXXFLAGS = -fno-rtti + +ifeq ($(ARCH),PowerPC) +CXXFLAGS += -maltivec +endif + +CPPFLAGS += -I$(PROJ_SRC_DIR)/../include + +include $(LEVEL)/Makefile.common + diff --git a/Lex/PPExpressions.cpp b/Lex/PPExpressions.cpp new file mode 100644 index 0000000000..b3457e7927 --- /dev/null +++ b/Lex/PPExpressions.cpp @@ -0,0 +1,654 @@ +//===--- PPExpressions.cpp - Preprocessor Expression Evaluation -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Preprocessor::EvaluateDirectiveExpression method, +// which parses and evaluates integer constant expressions for #if directives. +// +//===----------------------------------------------------------------------===// +// +// FIXME: implement testing for #assert's. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Basic/TargetInfo.h" +#include "clang/Basic/TokenKinds.h" +#include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/SmallString.h" +using namespace clang; + +static bool EvaluateDirectiveSubExpr(llvm::APSInt &LHS, unsigned MinPrec, + LexerToken &PeekTok, bool ValueLive, + Preprocessor &PP); + +/// DefinedTracker - This struct is used while parsing expressions to keep track +/// of whether !defined(X) has been seen. +/// +/// With this simple scheme, we handle the basic forms: +/// !defined(X) and !defined X +/// but we also trivially handle (silly) stuff like: +/// !!!defined(X) and +!defined(X) and !+!+!defined(X) and !(defined(X)). +struct DefinedTracker { + /// Each time a Value is evaluated, it returns information about whether the + /// parsed value is of the form defined(X), !defined(X) or is something else. + enum TrackerState { + DefinedMacro, // defined(X) + NotDefinedMacro, // !defined(X) + Unknown // Something else. + } State; + /// TheMacro - When the state is DefinedMacro or NotDefinedMacro, this + /// indicates the macro that was checked. + IdentifierInfo *TheMacro; +}; + + + +/// EvaluateValue - Evaluate the token PeekTok (and any others needed) and +/// return the computed value in Result. Return true if there was an error +/// parsing. This function also returns information about the form of the +/// expression in DT. See above for information on what DT means. +/// +/// If ValueLive is false, then this value is being evaluated in a context where +/// the result is not used. As such, avoid diagnostics that relate to +/// evaluation. +static bool EvaluateValue(llvm::APSInt &Result, LexerToken &PeekTok, + DefinedTracker &DT, bool ValueLive, + Preprocessor &PP) { + Result = 0; + DT.State = DefinedTracker::Unknown; + + // If this token's spelling is a pp-identifier, check to see if it is + // 'defined' or if it is a macro. Note that we check here because many + // keywords are pp-identifiers, so we can't check the kind. + if (IdentifierInfo *II = PeekTok.getIdentifierInfo()) { + // If this identifier isn't 'defined' and it wasn't macro expanded, it turns + // into a simple 0, unless it is the C++ keyword "true", in which case it + // turns into "1". + if (II->getPPKeywordID() != tok::pp_defined) { + Result = II->getTokenID() == tok::kw_true; + Result.setIsUnsigned(false); // "0" is signed intmax_t 0. + PP.LexNonComment(PeekTok); + return false; + } + + // Handle "defined X" and "defined(X)". + + // Get the next token, don't expand it. + PP.LexUnexpandedToken(PeekTok); + + // Two options, it can either be a pp-identifier or a (. + bool InParens = false; + if (PeekTok.getKind() == tok::l_paren) { + // Found a paren, remember we saw it and skip it. + InParens = true; + PP.LexUnexpandedToken(PeekTok); + } + + // If we don't have a pp-identifier now, this is an error. + if ((II = PeekTok.getIdentifierInfo()) == 0) { + PP.Diag(PeekTok, diag::err_pp_defined_requires_identifier); + return true; + } + + // Otherwise, we got an identifier, is it defined to something? + Result = II->getMacroInfo() != 0; + Result.setIsUnsigned(false); // Result is signed intmax_t. + + // If there is a macro, mark it used. + if (Result != 0 && ValueLive) { + II->getMacroInfo()->setIsUsed(true); + + // If this is the first use of a target-specific macro, warn about it. + if (II->getMacroInfo()->isTargetSpecific()) { + // Don't warn on second use. + II->getMacroInfo()->setIsTargetSpecific(false); + PP.getTargetInfo().DiagnoseNonPortability(PeekTok.getLocation(), + diag::port_target_macro_use); + } + } else if (ValueLive) { + // Use of a target-specific macro for some other target? If so, warn. + if (II->isOtherTargetMacro()) { + II->setIsOtherTargetMacro(false); // Don't warn on second use. + PP.getTargetInfo().DiagnoseNonPortability(PeekTok.getLocation(), + diag::port_target_macro_use); + } + } + + // Consume identifier. + PP.LexNonComment(PeekTok); + + // If we are in parens, ensure we have a trailing ). + if (InParens) { + if (PeekTok.getKind() != tok::r_paren) { + PP.Diag(PeekTok, diag::err_pp_missing_rparen); + return true; + } + // Consume the ). + PP.LexNonComment(PeekTok); + } + + // Success, remember that we saw defined(X). + DT.State = DefinedTracker::DefinedMacro; + DT.TheMacro = II; + return false; + } + + switch (PeekTok.getKind()) { + default: // Non-value token. + PP.Diag(PeekTok, diag::err_pp_expr_bad_token); + return true; + case tok::eom: + case tok::r_paren: + // If there is no expression, report and exit. + PP.Diag(PeekTok, diag::err_pp_expected_value_in_expr); + return true; + case tok::numeric_constant: { + llvm::SmallString<64> IntegerBuffer; + IntegerBuffer.resize(PeekTok.getLength()); + const char *ThisTokBegin = &IntegerBuffer[0]; + unsigned ActualLength = PP.getSpelling(PeekTok, ThisTokBegin); + NumericLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength, + PeekTok.getLocation(), PP); + if (Literal.hadError) + return true; // a diagnostic was already reported. + + if (Literal.isFloatingLiteral()) { + PP.Diag(PeekTok, diag::err_pp_illegal_floating_literal); + return true; + } + assert(Literal.isIntegerLiteral() && "Unknown ppnumber"); + + // Parse the integer literal into Result. + if (Literal.GetIntegerValue(Result)) { + // Overflow parsing integer literal. + if (ValueLive) PP.Diag(PeekTok, diag::warn_integer_too_large); + Result.setIsUnsigned(true); + } else { + // Set the signedness of the result to match whether there was a U suffix + // or not. + Result.setIsUnsigned(Literal.isUnsigned); + + // Detect overflow based on whether the value is signed. If signed + // and if the value is too large, emit a warning "integer constant is so + // large that it is unsigned" e.g. on 12345678901234567890 where intmax_t + // is 64-bits. + if (!Literal.isUnsigned && Result.isNegative()) { + if (ValueLive)PP.Diag(PeekTok, diag::warn_integer_too_large_for_signed); + Result.setIsUnsigned(true); + } + } + + // Consume the token. + PP.LexNonComment(PeekTok); + return false; + } + case tok::char_constant: { // 'x' + llvm::SmallString<32> CharBuffer; + CharBuffer.resize(PeekTok.getLength()); + const char *ThisTokBegin = &CharBuffer[0]; + unsigned ActualLength = PP.getSpelling(PeekTok, ThisTokBegin); + CharLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength, + PeekTok.getLocation(), PP); + if (Literal.hadError()) + return true; // A diagnostic was already emitted. + + // Character literals are always int or wchar_t, expand to intmax_t. + TargetInfo &TI = PP.getTargetInfo(); + unsigned NumBits; + if (Literal.isWide()) + NumBits = TI.getWCharWidth(PeekTok.getLocation()); + else + NumBits = TI.getCharWidth(PeekTok.getLocation()); + + // Set the width. + llvm::APSInt Val(NumBits); + // Set the value. + Val = Literal.getValue(); + // Set the signedness. + Val.setIsUnsigned(!TI.isCharSigned(PeekTok.getLocation())); + + if (Result.getBitWidth() > Val.getBitWidth()) { + if (Val.isSigned()) + Result = Val.sext(Result.getBitWidth()); + else + Result = Val.zext(Result.getBitWidth()); + Result.setIsUnsigned(Val.isUnsigned()); + } else { + assert(Result.getBitWidth() == Val.getBitWidth() && + "intmax_t smaller than char/wchar_t?"); + Result = Val; + } + + // Consume the token. + PP.LexNonComment(PeekTok); + return false; + } + case tok::l_paren: + PP.LexNonComment(PeekTok); // Eat the (. + // Parse the value and if there are any binary operators involved, parse + // them. + if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; + + // If this is a silly value like (X), which doesn't need parens, check for + // !(defined X). + if (PeekTok.getKind() == tok::r_paren) { + // Just use DT unmodified as our result. + } else { + if (EvaluateDirectiveSubExpr(Result, 1, PeekTok, ValueLive, PP)) + return true; + + if (PeekTok.getKind() != tok::r_paren) { + PP.Diag(PeekTok, diag::err_pp_expected_rparen); + return true; + } + DT.State = DefinedTracker::Unknown; + } + PP.LexNonComment(PeekTok); // Eat the ). + return false; + + case tok::plus: + // Unary plus doesn't modify the value. + PP.LexNonComment(PeekTok); + return EvaluateValue(Result, PeekTok, DT, ValueLive, PP); + case tok::minus: { + SourceLocation Loc = PeekTok.getLocation(); + PP.LexNonComment(PeekTok); + if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; + // C99 6.5.3.3p3: The sign of the result matches the sign of the operand. + Result = -Result; + + bool Overflow = false; + if (Result.isUnsigned()) + Overflow = !Result.isPositive(); + else if (Result.isMinSignedValue()) + Overflow = true; // -MININT is the only thing that overflows. + + // If this operator is live and overflowed, report the issue. + if (Overflow && ValueLive) + PP.Diag(Loc, diag::warn_pp_expr_overflow); + + DT.State = DefinedTracker::Unknown; + return false; + } + + case tok::tilde: + PP.LexNonComment(PeekTok); + if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; + // C99 6.5.3.3p4: The sign of the result matches the sign of the operand. + Result = ~Result; + DT.State = DefinedTracker::Unknown; + return false; + + case tok::exclaim: + PP.LexNonComment(PeekTok); + if (EvaluateValue(Result, PeekTok, DT, ValueLive, PP)) return true; + Result = !Result; + // C99 6.5.3.3p5: The sign of the result is 'int', aka it is signed. + Result.setIsUnsigned(false); + + if (DT.State == DefinedTracker::DefinedMacro) + DT.State = DefinedTracker::NotDefinedMacro; + else if (DT.State == DefinedTracker::NotDefinedMacro) + DT.State = DefinedTracker::DefinedMacro; + return false; + + // FIXME: Handle #assert + } +} + + + +/// getPrecedence - Return the precedence of the specified binary operator +/// token. This returns: +/// ~0 - Invalid token. +/// 14 - *,/,% +/// 13 - -,+ +/// 12 - <<,>> +/// 11 - >=, <=, >, < +/// 10 - ==, != +/// 9 - & +/// 8 - ^ +/// 7 - | +/// 6 - && +/// 5 - || +/// 4 - ? +/// 3 - : +/// 0 - eom, ) +static unsigned getPrecedence(tok::TokenKind Kind) { + switch (Kind) { + default: return ~0U; + case tok::percent: + case tok::slash: + case tok::star: return 14; + case tok::plus: + case tok::minus: return 13; + case tok::lessless: + case tok::greatergreater: return 12; + case tok::lessequal: + case tok::less: + case tok::greaterequal: + case tok::greater: return 11; + case tok::exclaimequal: + case tok::equalequal: return 10; + case tok::amp: return 9; + case tok::caret: return 8; + case tok::pipe: return 7; + case tok::ampamp: return 6; + case tok::pipepipe: return 5; + case tok::question: return 4; + case tok::colon: return 3; + case tok::comma: return 2; + case tok::r_paren: return 0; // Lowest priority, end of expr. + case tok::eom: return 0; // Lowest priority, end of macro. + } +} + + +/// EvaluateDirectiveSubExpr - Evaluate the subexpression whose first token is +/// PeekTok, and whose precedence is PeekPrec. +/// +/// If ValueLive is false, then this value is being evaluated in a context where +/// the result is not used. As such, avoid diagnostics that relate to +/// evaluation. +static bool EvaluateDirectiveSubExpr(llvm::APSInt &LHS, unsigned MinPrec, + LexerToken &PeekTok, bool ValueLive, + Preprocessor &PP) { + unsigned PeekPrec = getPrecedence(PeekTok.getKind()); + // If this token isn't valid, report the error. + if (PeekPrec == ~0U) { + PP.Diag(PeekTok, diag::err_pp_expr_bad_token); + return true; + } + + while (1) { + // If this token has a lower precedence than we are allowed to parse, return + // it so that higher levels of the recursion can parse it. + if (PeekPrec < MinPrec) + return false; + + tok::TokenKind Operator = PeekTok.getKind(); + + // If this is a short-circuiting operator, see if the RHS of the operator is + // dead. Note that this cannot just clobber ValueLive. Consider + // "0 && 1 ? 4 : 1 / 0", which is parsed as "(0 && 1) ? 4 : (1 / 0)". In + // this example, the RHS of the && being dead does not make the rest of the + // expr dead. + bool RHSIsLive; + if (Operator == tok::ampamp && LHS == 0) + RHSIsLive = false; // RHS of "0 && x" is dead. + else if (Operator == tok::pipepipe && LHS != 0) + RHSIsLive = false; // RHS of "1 || x" is dead. + else if (Operator == tok::question && LHS == 0) + RHSIsLive = false; // RHS (x) of "0 ? x : y" is dead. + else + RHSIsLive = ValueLive; + + // Consume the operator, saving the operator token for error reporting. + LexerToken OpToken = PeekTok; + PP.LexNonComment(PeekTok); + + llvm::APSInt RHS(LHS.getBitWidth()); + // Parse the RHS of the operator. + DefinedTracker DT; + if (EvaluateValue(RHS, PeekTok, DT, RHSIsLive, PP)) return true; + + // Remember the precedence of this operator and get the precedence of the + // operator immediately to the right of the RHS. + unsigned ThisPrec = PeekPrec; + PeekPrec = getPrecedence(PeekTok.getKind()); + + // If this token isn't valid, report the error. + if (PeekPrec == ~0U) { + PP.Diag(PeekTok, diag::err_pp_expr_bad_token); + return true; + } + + bool isRightAssoc = Operator == tok::question; + + // Get the precedence of the operator to the right of the RHS. If it binds + // more tightly with RHS than we do, evaluate it completely first. + if (ThisPrec < PeekPrec || + (ThisPrec == PeekPrec && isRightAssoc)) { + if (EvaluateDirectiveSubExpr(RHS, ThisPrec+1, PeekTok, RHSIsLive, PP)) + return true; + PeekPrec = getPrecedence(PeekTok.getKind()); + } + assert(PeekPrec <= ThisPrec && "Recursion didn't work!"); + + // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if + // either operand is unsigned. Don't do this for x and y in "x ? y : z". + llvm::APSInt Res(LHS.getBitWidth()); + if (Operator != tok::question) { + Res.setIsUnsigned(LHS.isUnsigned()|RHS.isUnsigned()); + // If this just promoted something from signed to unsigned, and if the + // value was negative, warn about it. + if (ValueLive && Res.isUnsigned()) { + if (!LHS.isUnsigned() && LHS.isNegative()) + PP.Diag(OpToken, diag::warn_pp_convert_lhs_to_positive, + LHS.toString(10, true) + " to " + LHS.toString(10, false)); + if (!RHS.isUnsigned() && RHS.isNegative()) + PP.Diag(OpToken, diag::warn_pp_convert_rhs_to_positive, + RHS.toString(10, true) + " to " + RHS.toString(10, false)); + } + LHS.setIsUnsigned(Res.isUnsigned()); + RHS.setIsUnsigned(Res.isUnsigned()); + } + + // FIXME: All of these should detect and report overflow?? + bool Overflow = false; + switch (Operator) { + default: assert(0 && "Unknown operator token!"); + case tok::percent: + if (RHS == 0) { + if (ValueLive) PP.Diag(OpToken, diag::err_pp_remainder_by_zero); + return true; + } + Res = LHS % RHS; + break; + case tok::slash: + if (RHS == 0) { + if (ValueLive) PP.Diag(OpToken, diag::err_pp_division_by_zero); + return true; + } + Res = LHS / RHS; + if (LHS.isSigned()) + Overflow = LHS.isMinSignedValue() && RHS.isAllOnesValue(); // MININT/-1 + break; + case tok::star: + Res = LHS * RHS; + if (LHS != 0 && RHS != 0) + Overflow = Res/RHS != LHS || Res/LHS != RHS; + break; + case tok::lessless: { + // Determine whether overflow is about to happen. + unsigned ShAmt = RHS.getLimitedValue(); + if (ShAmt >= LHS.getBitWidth()) + Overflow = true, ShAmt = LHS.getBitWidth()-1; + else if (LHS.isUnsigned()) + Overflow = ShAmt > LHS.countLeadingZeros(); + else if (LHS.isPositive()) + Overflow = ShAmt >= LHS.countLeadingZeros(); // Don't allow sign change. + else + Overflow = ShAmt >= LHS.countLeadingOnes(); + + Res = LHS << ShAmt; + break; + } + case tok::greatergreater: { + // Determine whether overflow is about to happen. + unsigned ShAmt = RHS.getLimitedValue(); + if (ShAmt >= LHS.getBitWidth()) + Overflow = true, ShAmt = LHS.getBitWidth()-1; + Res = LHS >> ShAmt; + break; + } + case tok::plus: + Res = LHS + RHS; + if (LHS.isUnsigned()) + Overflow = Res.ult(LHS); + else if (LHS.isPositive() == RHS.isPositive() && + Res.isPositive() != LHS.isPositive()) + Overflow = true; // Overflow for signed addition. + break; + case tok::minus: + Res = LHS - RHS; + if (LHS.isUnsigned()) + Overflow = Res.ugt(LHS); + else if (LHS.isPositive() != RHS.isPositive() && + Res.isPositive() != LHS.isPositive()) + Overflow = true; // Overflow for signed subtraction. + break; + case tok::lessequal: + Res = LHS <= RHS; + Res.setIsUnsigned(false); // C99 6.5.8p6, result is always int (signed) + break; + case tok::less: + Res = LHS < RHS; + Res.setIsUnsigned(false); // C99 6.5.8p6, result is always int (signed) + break; + case tok::greaterequal: + Res = LHS >= RHS; + Res.setIsUnsigned(false); // C99 6.5.8p6, result is always int (signed) + break; + case tok::greater: + Res = LHS > RHS; + Res.setIsUnsigned(false); // C99 6.5.8p6, result is always int (signed) + break; + case tok::exclaimequal: + Res = LHS != RHS; + Res.setIsUnsigned(false); // C99 6.5.9p3, result is always int (signed) + break; + case tok::equalequal: + Res = LHS == RHS; + Res.setIsUnsigned(false); // C99 6.5.9p3, result is always int (signed) + break; + case tok::amp: + Res = LHS & RHS; + break; + case tok::caret: + Res = LHS ^ RHS; + break; + case tok::pipe: + Res = LHS | RHS; + break; + case tok::ampamp: + Res = (LHS != 0 && RHS != 0); + Res.setIsUnsigned(false); // C99 6.5.13p3, result is always int (signed) + break; + case tok::pipepipe: + Res = (LHS != 0 || RHS != 0); + Res.setIsUnsigned(false); // C99 6.5.14p3, result is always int (signed) + break; + case tok::comma: + PP.Diag(OpToken, diag::ext_pp_comma_expr); + Res = RHS; // LHS = LHS,RHS -> RHS. + break; + case tok::question: { + // Parse the : part of the expression. + if (PeekTok.getKind() != tok::colon) { + PP.Diag(OpToken, diag::err_pp_question_without_colon); + return true; + } + // Consume the :. + PP.LexNonComment(PeekTok); + + // Evaluate the value after the :. + bool AfterColonLive = ValueLive && LHS == 0; + llvm::APSInt AfterColonVal(LHS.getBitWidth()); + DefinedTracker DT; + if (EvaluateValue(AfterColonVal, PeekTok, DT, AfterColonLive, PP)) + return true; + + // Parse anything after the : RHS that has a higher precedence than ?. + if (EvaluateDirectiveSubExpr(AfterColonVal, ThisPrec+1, + PeekTok, AfterColonLive, PP)) + return true; + + // Now that we have the condition, the LHS and the RHS of the :, evaluate. + Res = LHS != 0 ? RHS : AfterColonVal; + + // Usual arithmetic conversions (C99 6.3.1.8p1): result is unsigned if + // either operand is unsigned. + Res.setIsUnsigned(RHS.isUnsigned() | AfterColonVal.isUnsigned()); + + // Figure out the precedence of the token after the : part. + PeekPrec = getPrecedence(PeekTok.getKind()); + break; + } + case tok::colon: + // Don't allow :'s to float around without being part of ?: exprs. + PP.Diag(OpToken, diag::err_pp_colon_without_question); + return true; + } + + // If this operator is live and overflowed, report the issue. + if (Overflow && ValueLive) + PP.Diag(OpToken, diag::warn_pp_expr_overflow); + + // Put the result back into 'LHS' for our next iteration. + LHS = Res; + } + + return false; +} + +/// EvaluateDirectiveExpression - Evaluate an integer constant expression that +/// may occur after a #if or #elif directive. If the expression is equivalent +/// to "!defined(X)" return X in IfNDefMacro. +bool Preprocessor:: +EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { + // Peek ahead one token. + LexerToken Tok; + Lex(Tok); + + // C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t. + unsigned BitWidth = getTargetInfo().getIntMaxTWidth(Tok.getLocation()); + llvm::APSInt ResVal(BitWidth); + DefinedTracker DT; + if (EvaluateValue(ResVal, Tok, DT, true, *this)) { + // Parse error, skip the rest of the macro line. + if (Tok.getKind() != tok::eom) + DiscardUntilEndOfDirective(); + return false; + } + + // If we are at the end of the expression after just parsing a value, there + // must be no (unparenthesized) binary operators involved, so we can exit + // directly. + if (Tok.getKind() == tok::eom) { + // If the expression we parsed was of the form !defined(macro), return the + // macro in IfNDefMacro. + if (DT.State == DefinedTracker::NotDefinedMacro) + IfNDefMacro = DT.TheMacro; + + return ResVal != 0; + } + + // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the + // operator and the stuff after it. + if (EvaluateDirectiveSubExpr(ResVal, 1, Tok, true, *this)) { + // Parse error, skip the rest of the macro line. + if (Tok.getKind() != tok::eom) + DiscardUntilEndOfDirective(); + return false; + } + + // If we aren't at the tok::eom token, something bad happened, like an extra + // ')' token. + if (Tok.getKind() != tok::eom) { + Diag(Tok, diag::err_pp_expected_eol); + DiscardUntilEndOfDirective(); + } + + return ResVal != 0; +} + diff --git a/Lex/Pragma.cpp b/Lex/Pragma.cpp new file mode 100644 index 0000000000..de59934b4d --- /dev/null +++ b/Lex/Pragma.cpp @@ -0,0 +1,369 @@ +//===--- Pragma.cpp - Pragma registration and handling --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PragmaHandler/PragmaTable interfaces and implements +// pragma related methods of the Preprocessor class. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Pragma.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/ADT/SmallVector.h" +using namespace clang; + +// Out-of-line destructor to provide a home for the class. +PragmaHandler::~PragmaHandler() { +} + +//===----------------------------------------------------------------------===// +// PragmaNamespace Implementation. +//===----------------------------------------------------------------------===// + + +PragmaNamespace::~PragmaNamespace() { + for (unsigned i = 0, e = Handlers.size(); i != e; ++i) + delete Handlers[i]; +} + +/// FindHandler - Check to see if there is already a handler for the +/// specified name. If not, return the handler for the null identifier if it +/// exists, otherwise return null. If IgnoreNull is true (the default) then +/// the null handler isn't returned on failure to match. +PragmaHandler *PragmaNamespace::FindHandler(const IdentifierInfo *Name, + bool IgnoreNull) const { + PragmaHandler *NullHandler = 0; + for (unsigned i = 0, e = Handlers.size(); i != e; ++i) { + if (Handlers[i]->getName() == Name) + return Handlers[i]; + + if (Handlers[i]->getName() == 0) + NullHandler = Handlers[i]; + } + return IgnoreNull ? 0 : NullHandler; +} + +void PragmaNamespace::HandlePragma(Preprocessor &PP, LexerToken &Tok) { + // Read the 'namespace' that the directive is in, e.g. STDC. Do not macro + // expand it, the user can have a STDC #define, that should not affect this. + PP.LexUnexpandedToken(Tok); + + // Get the handler for this token. If there is no handler, ignore the pragma. + PragmaHandler *Handler = FindHandler(Tok.getIdentifierInfo(), false); + if (Handler == 0) return; + + // Otherwise, pass it down. + Handler->HandlePragma(PP, Tok); +} + +//===----------------------------------------------------------------------===// +// Preprocessor Pragma Directive Handling. +//===----------------------------------------------------------------------===// + +/// HandlePragmaDirective - The "#pragma" directive has been parsed. Lex the +/// rest of the pragma, passing it to the registered pragma handlers. +void Preprocessor::HandlePragmaDirective() { + ++NumPragma; + + // Invoke the first level of pragma handlers which reads the namespace id. + LexerToken Tok; + PragmaHandlers->HandlePragma(*this, Tok); + + // If the pragma handler didn't read the rest of the line, consume it now. + if (CurLexer->ParsingPreprocessorDirective) + DiscardUntilEndOfDirective(); +} + +/// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then +/// return the first token after the directive. The _Pragma token has just +/// been read into 'Tok'. +void Preprocessor::Handle_Pragma(LexerToken &Tok) { + // Remember the pragma token location. + SourceLocation PragmaLoc = Tok.getLocation(); + + // Read the '('. + Lex(Tok); + if (Tok.getKind() != tok::l_paren) + return Diag(PragmaLoc, diag::err__Pragma_malformed); + + // Read the '"..."'. + Lex(Tok); + if (Tok.getKind() != tok::string_literal && + Tok.getKind() != tok::wide_string_literal) + return Diag(PragmaLoc, diag::err__Pragma_malformed); + + // Remember the string. + std::string StrVal = getSpelling(Tok); + SourceLocation StrLoc = Tok.getLocation(); + + // Read the ')'. + Lex(Tok); + if (Tok.getKind() != tok::r_paren) + return Diag(PragmaLoc, diag::err__Pragma_malformed); + + // The _Pragma is lexically sound. Destringize according to C99 6.10.9.1. + if (StrVal[0] == 'L') // Remove L prefix. + StrVal.erase(StrVal.begin()); + assert(StrVal[0] == '"' && StrVal[StrVal.size()-1] == '"' && + "Invalid string token!"); + + // Remove the front quote, replacing it with a space, so that the pragma + // contents appear to have a space before them. + StrVal[0] = ' '; + + // Replace the terminating quote with a \n\0. + StrVal[StrVal.size()-1] = '\n'; + StrVal += '\0'; + + // Remove escaped quotes and escapes. + for (unsigned i = 0, e = StrVal.size(); i != e-1; ++i) { + if (StrVal[i] == '\\' && + (StrVal[i+1] == '\\' || StrVal[i+1] == '"')) { + // \\ -> '\' and \" -> '"'. + StrVal.erase(StrVal.begin()+i); + --e; + } + } + + // Plop the string (including the newline and trailing null) into a buffer + // where we can lex it. + SourceLocation TokLoc = CreateString(&StrVal[0], StrVal.size(), StrLoc); + const char *StrData = SourceMgr.getCharacterData(TokLoc); + + unsigned FileID = TokLoc.getFileID(); + assert(FileID && "Could not get FileID for _Pragma?"); + + // Make and enter a lexer object so that we lex and expand the tokens just + // like any others. + Lexer *TL = new Lexer(SourceMgr.getBuffer(FileID), FileID, *this, + StrData, StrData+StrVal.size()-1 /* no null */); + + // Ensure that the lexer thinks it is inside a directive, so that end \n will + // return an EOM token. + TL->ParsingPreprocessorDirective = true; + + // This lexer really is for _Pragma. + TL->Is_PragmaLexer = true; + + EnterSourceFileWithLexer(TL, 0); + + // With everything set up, lex this as a #pragma directive. + HandlePragmaDirective(); + + // Finally, return whatever came after the pragma directive. + return Lex(Tok); +} + + + +/// HandlePragmaOnce - Handle #pragma once. OnceTok is the 'once'. +/// +void Preprocessor::HandlePragmaOnce(LexerToken &OnceTok) { + if (isInPrimaryFile()) { + Diag(OnceTok, diag::pp_pragma_once_in_main_file); + return; + } + + // Get the current file lexer we're looking at. Ignore _Pragma 'files' etc. + unsigned FileID = getCurrentFileLexer()->getCurFileID(); + + // Mark the file as a once-only file now. + HeaderInfo.MarkFileIncludeOnce(SourceMgr.getFileEntryForFileID(FileID)); +} + +/// HandlePragmaPoison - Handle #pragma GCC poison. PoisonTok is the 'poison'. +/// +void Preprocessor::HandlePragmaPoison(LexerToken &PoisonTok) { + LexerToken Tok; + + while (1) { + // Read the next token to poison. While doing this, pretend that we are + // skipping while reading the identifier to poison. + // This avoids errors on code like: + // #pragma GCC poison X + // #pragma GCC poison X + if (CurLexer) CurLexer->LexingRawMode = true; + LexUnexpandedToken(Tok); + if (CurLexer) CurLexer->LexingRawMode = false; + + // If we reached the end of line, we're done. + if (Tok.getKind() == tok::eom) return; + + // Can only poison identifiers. + if (Tok.getKind() != tok::identifier) { + Diag(Tok, diag::err_pp_invalid_poison); + return; + } + + // Look up the identifier info for the token. We disabled identifier lookup + // by saying we're skipping contents, so we need to do this manually. + IdentifierInfo *II = LookUpIdentifierInfo(Tok); + + // Already poisoned. + if (II->isPoisoned()) continue; + + // If this is a macro identifier, emit a warning. + if (II->getMacroInfo()) + Diag(Tok, diag::pp_poisoning_existing_macro); + + // Finally, poison it! + II->setIsPoisoned(); + } +} + +/// HandlePragmaSystemHeader - Implement #pragma GCC system_header. We know +/// that the whole directive has been parsed. +void Preprocessor::HandlePragmaSystemHeader(LexerToken &SysHeaderTok) { + if (isInPrimaryFile()) { + Diag(SysHeaderTok, diag::pp_pragma_sysheader_in_main_file); + return; + } + + // Get the current file lexer we're looking at. Ignore _Pragma 'files' etc. + Lexer *TheLexer = getCurrentFileLexer(); + + // Mark the file as a system header. + const FileEntry *File = + SourceMgr.getFileEntryForFileID(TheLexer->getCurFileID()); + HeaderInfo.MarkFileSystemHeader(File); + + // Notify the client, if desired, that we are in a new source file. + if (Callbacks) + Callbacks->FileChanged(TheLexer->getSourceLocation(TheLexer->BufferPtr), + PPCallbacks::SystemHeaderPragma, + DirectoryLookup::SystemHeaderDir); +} + +/// HandlePragmaDependency - Handle #pragma GCC dependency "foo" blah. +/// +void Preprocessor::HandlePragmaDependency(LexerToken &DependencyTok) { + LexerToken FilenameTok; + CurLexer->LexIncludeFilename(FilenameTok); + + // If the token kind is EOM, the error has already been diagnosed. + if (FilenameTok.getKind() == tok::eom) + return; + + // Reserve a buffer to get the spelling. + llvm::SmallVector<char, 128> FilenameBuffer; + FilenameBuffer.resize(FilenameTok.getLength()); + + const char *FilenameStart = &FilenameBuffer[0], *FilenameEnd; + bool isAngled = GetIncludeFilenameSpelling(FilenameTok, + FilenameStart, FilenameEnd); + // If GetIncludeFilenameSpelling set the start ptr to null, there was an + // error. + if (FilenameStart == 0) + return; + + // Search include directories for this file. + const DirectoryLookup *CurDir; + const FileEntry *File = LookupFile(FilenameStart, FilenameEnd, + isAngled, 0, CurDir); + if (File == 0) + return Diag(FilenameTok, diag::err_pp_file_not_found, + std::string(FilenameStart, FilenameEnd)); + + unsigned FileID = getCurrentFileLexer()->getCurFileID(); + const FileEntry *CurFile = SourceMgr.getFileEntryForFileID(FileID); + + // If this file is older than the file it depends on, emit a diagnostic. + if (CurFile && CurFile->getModificationTime() < File->getModificationTime()) { + // Lex tokens at the end of the message and include them in the message. + std::string Message; + Lex(DependencyTok); + while (DependencyTok.getKind() != tok::eom) { + Message += getSpelling(DependencyTok) + " "; + Lex(DependencyTok); + } + + Message.erase(Message.end()-1); + Diag(FilenameTok, diag::pp_out_of_date_dependency, Message); + } +} + + +/// AddPragmaHandler - Add the specified pragma handler to the preprocessor. +/// If 'Namespace' is non-null, then it is a token required to exist on the +/// pragma line before the pragma string starts, e.g. "STDC" or "GCC". +void Preprocessor::AddPragmaHandler(const char *Namespace, + PragmaHandler *Handler) { + PragmaNamespace *InsertNS = PragmaHandlers; + + // If this is specified to be in a namespace, step down into it. + if (Namespace) { + IdentifierInfo *NSID = getIdentifierInfo(Namespace); + + // If there is already a pragma handler with the name of this namespace, + // we either have an error (directive with the same name as a namespace) or + // we already have the namespace to insert into. + if (PragmaHandler *Existing = PragmaHandlers->FindHandler(NSID)) { + InsertNS = Existing->getIfNamespace(); + assert(InsertNS != 0 && "Cannot have a pragma namespace and pragma" + " handler with the same name!"); + } else { + // Otherwise, this namespace doesn't exist yet, create and insert the + // handler for it. + InsertNS = new PragmaNamespace(NSID); + PragmaHandlers->AddPragma(InsertNS); + } + } + + // Check to make sure we don't already have a pragma for this identifier. + assert(!InsertNS->FindHandler(Handler->getName()) && + "Pragma handler already exists for this identifier!"); + InsertNS->AddPragma(Handler); +} + +namespace { +struct PragmaOnceHandler : public PragmaHandler { + PragmaOnceHandler(const IdentifierInfo *OnceID) : PragmaHandler(OnceID) {} + virtual void HandlePragma(Preprocessor &PP, LexerToken &OnceTok) { + PP.CheckEndOfDirective("#pragma once"); + PP.HandlePragmaOnce(OnceTok); + } +}; + +struct PragmaPoisonHandler : public PragmaHandler { + PragmaPoisonHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {} + virtual void HandlePragma(Preprocessor &PP, LexerToken &PoisonTok) { + PP.HandlePragmaPoison(PoisonTok); + } +}; + +struct PragmaSystemHeaderHandler : public PragmaHandler { + PragmaSystemHeaderHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {} + virtual void HandlePragma(Preprocessor &PP, LexerToken &SHToken) { + PP.HandlePragmaSystemHeader(SHToken); + PP.CheckEndOfDirective("#pragma"); + } +}; +struct PragmaDependencyHandler : public PragmaHandler { + PragmaDependencyHandler(const IdentifierInfo *ID) : PragmaHandler(ID) {} + virtual void HandlePragma(Preprocessor &PP, LexerToken &DepToken) { + PP.HandlePragmaDependency(DepToken); + } +}; +} // end anonymous namespace + + +/// RegisterBuiltinPragmas - Install the standard preprocessor pragmas: +/// #pragma GCC poison/system_header/dependency and #pragma once. +void Preprocessor::RegisterBuiltinPragmas() { + AddPragmaHandler(0, new PragmaOnceHandler(getIdentifierInfo("once"))); + AddPragmaHandler("GCC", new PragmaPoisonHandler(getIdentifierInfo("poison"))); + AddPragmaHandler("GCC", new PragmaSystemHeaderHandler( + getIdentifierInfo("system_header"))); + AddPragmaHandler("GCC", new PragmaDependencyHandler( + getIdentifierInfo("dependency"))); +} diff --git a/Lex/Preprocessor.cpp b/Lex/Preprocessor.cpp new file mode 100644 index 0000000000..104fb65152 --- /dev/null +++ b/Lex/Preprocessor.cpp @@ -0,0 +1,2087 @@ +//===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Preprocessor interface. +// +//===----------------------------------------------------------------------===// +// +// Options to support: +// -H - Print the name of each header file used. +// -d[MDNI] - Dump various things. +// -fworking-directory - #line's with preprocessor's working dir. +// -fpreprocessed +// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD +// -W* +// -w +// +// Messages to emit: +// "Multiple include guards may be useful for:\n" +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/MacroInfo.h" +#include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Pragma.h" +#include "clang/Lex/ScratchBuffer.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/SmallVector.h" +#include <iostream> +using namespace clang; + +//===----------------------------------------------------------------------===// + +Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts, + TargetInfo &target, SourceManager &SM, + HeaderSearch &Headers) + : Diags(diags), Features(opts), Target(target), FileMgr(Headers.getFileMgr()), + SourceMgr(SM), HeaderInfo(Headers), Identifiers(opts), + CurLexer(0), CurDirLookup(0), CurMacroExpander(0), Callbacks(0) { + ScratchBuf = new ScratchBuffer(SourceMgr); + + // Clear stats. + NumDirectives = NumDefined = NumUndefined = NumPragma = 0; + NumIf = NumElse = NumEndif = 0; + NumEnteredSourceFiles = 0; + NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; + NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; + MaxIncludeStackDepth = 0; + NumSkipped = 0; + + // Default to discarding comments. + KeepComments = false; + KeepMacroComments = false; + + // Macro expansion is enabled. + DisableMacroExpansion = false; + InMacroArgs = false; + + // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. + // This gets unpoisoned where it is allowed. + (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); + + // Initialize the pragma handlers. + PragmaHandlers = new PragmaNamespace(0); + RegisterBuiltinPragmas(); + + // Initialize builtin macros like __LINE__ and friends. + RegisterBuiltinMacros(); +} + +Preprocessor::~Preprocessor() { + // Free any active lexers. + delete CurLexer; + + while (!IncludeMacroStack.empty()) { + delete IncludeMacroStack.back().TheLexer; + delete IncludeMacroStack.back().TheMacroExpander; + IncludeMacroStack.pop_back(); + } + + // Release pragma information. + delete PragmaHandlers; + + // Delete the scratch buffer info. + delete ScratchBuf; +} + +PPCallbacks::~PPCallbacks() { +} + +/// Diag - Forwarding function for diagnostics. This emits a diagnostic at +/// the specified LexerToken's location, translating the token's start +/// position in the current buffer into a SourcePosition object for rendering. +void Preprocessor::Diag(SourceLocation Loc, unsigned DiagID) { + Diags.Report(Loc, DiagID); +} + +void Preprocessor::Diag(SourceLocation Loc, unsigned DiagID, + const std::string &Msg) { + Diags.Report(Loc, DiagID, &Msg, 1); +} + +void Preprocessor::DumpToken(const LexerToken &Tok, bool DumpFlags) const { + std::cerr << tok::getTokenName(Tok.getKind()) << " '" + << getSpelling(Tok) << "'"; + + if (!DumpFlags) return; + std::cerr << "\t"; + if (Tok.isAtStartOfLine()) + std::cerr << " [StartOfLine]"; + if (Tok.hasLeadingSpace()) + std::cerr << " [LeadingSpace]"; + if (Tok.isExpandDisabled()) + std::cerr << " [ExpandDisabled]"; + if (Tok.needsCleaning()) { + const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); + std::cerr << " [UnClean='" << std::string(Start, Start+Tok.getLength()) + << "']"; + } +} + +void Preprocessor::DumpMacro(const MacroInfo &MI) const { + std::cerr << "MACRO: "; + for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { + DumpToken(MI.getReplacementToken(i)); + std::cerr << " "; + } + std::cerr << "\n"; +} + +void Preprocessor::PrintStats() { + std::cerr << "\n*** Preprocessor Stats:\n"; + std::cerr << NumDirectives << " directives found:\n"; + std::cerr << " " << NumDefined << " #define.\n"; + std::cerr << " " << NumUndefined << " #undef.\n"; + std::cerr << " #include/#include_next/#import:\n"; + std::cerr << " " << NumEnteredSourceFiles << " source files entered.\n"; + std::cerr << " " << MaxIncludeStackDepth << " max include stack depth\n"; + std::cerr << " " << NumIf << " #if/#ifndef/#ifdef.\n"; + std::cerr << " " << NumElse << " #else/#elif.\n"; + std::cerr << " " << NumEndif << " #endif.\n"; + std::cerr << " " << NumPragma << " #pragma.\n"; + std::cerr << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; + + std::cerr << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" + << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " + << NumFastMacroExpanded << " on the fast path.\n"; + std::cerr << (NumFastTokenPaste+NumTokenPaste) + << " token paste (##) operations performed, " + << NumFastTokenPaste << " on the fast path.\n"; +} + +//===----------------------------------------------------------------------===// +// Token Spelling +//===----------------------------------------------------------------------===// + + +/// getSpelling() - Return the 'spelling' of this token. The spelling of a +/// token are the characters used to represent the token in the source file +/// after trigraph expansion and escaped-newline folding. In particular, this +/// wants to get the true, uncanonicalized, spelling of things like digraphs +/// UCNs, etc. +std::string Preprocessor::getSpelling(const LexerToken &Tok) const { + assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); + + // If this token contains nothing interesting, return it directly. + const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation()); + if (!Tok.needsCleaning()) + return std::string(TokStart, TokStart+Tok.getLength()); + + std::string Result; + Result.reserve(Tok.getLength()); + + // Otherwise, hard case, relex the characters into the string. + for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); + Ptr != End; ) { + unsigned CharSize; + Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features)); + Ptr += CharSize; + } + assert(Result.size() != unsigned(Tok.getLength()) && + "NeedsCleaning flag set on something that didn't need cleaning!"); + return Result; +} + +/// getSpelling - This method is used to get the spelling of a token into a +/// preallocated buffer, instead of as an std::string. The caller is required +/// to allocate enough space for the token, which is guaranteed to be at least +/// Tok.getLength() bytes long. The actual length of the token is returned. +/// +/// Note that this method may do two possible things: it may either fill in +/// the buffer specified with characters, or it may *change the input pointer* +/// to point to a constant buffer with the data already in it (avoiding a +/// copy). The caller is not allowed to modify the returned buffer pointer +/// if an internal buffer is returned. +unsigned Preprocessor::getSpelling(const LexerToken &Tok, + const char *&Buffer) const { + assert((int)Tok.getLength() >= 0 && "Token character range is bogus!"); + + // If this token is an identifier, just return the string from the identifier + // table, which is very quick. + if (const IdentifierInfo *II = Tok.getIdentifierInfo()) { + Buffer = II->getName(); + return Tok.getLength(); + } + + // Otherwise, compute the start of the token in the input lexer buffer. + const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation()); + + // If this token contains nothing interesting, return it directly. + if (!Tok.needsCleaning()) { + Buffer = TokStart; + return Tok.getLength(); + } + // Otherwise, hard case, relex the characters into the string. + char *OutBuf = const_cast<char*>(Buffer); + for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); + Ptr != End; ) { + unsigned CharSize; + *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features); + Ptr += CharSize; + } + assert(unsigned(OutBuf-Buffer) != Tok.getLength() && + "NeedsCleaning flag set on something that didn't need cleaning!"); + + return OutBuf-Buffer; +} + + +/// CreateString - Plop the specified string into a scratch buffer and return a +/// location for it. If specified, the source location provides a source +/// location for the token. +SourceLocation Preprocessor:: +CreateString(const char *Buf, unsigned Len, SourceLocation SLoc) { + if (SLoc.isValid()) + return ScratchBuf->getToken(Buf, Len, SLoc); + return ScratchBuf->getToken(Buf, Len); +} + + +//===----------------------------------------------------------------------===// +// Source File Location Methods. +//===----------------------------------------------------------------------===// + +/// LookupFile - Given a "foo" or <foo> reference, look up the indicated file, +/// return null on failure. isAngled indicates whether the file reference is +/// for system #include's or not (i.e. using <> instead of ""). +const FileEntry *Preprocessor::LookupFile(const char *FilenameStart, + const char *FilenameEnd, + bool isAngled, + const DirectoryLookup *FromDir, + const DirectoryLookup *&CurDir) { + // If the header lookup mechanism may be relative to the current file, pass in + // info about where the current file is. + const FileEntry *CurFileEnt = 0; + if (!FromDir) { + unsigned TheFileID = getCurrentFileLexer()->getCurFileID(); + CurFileEnt = SourceMgr.getFileEntryForFileID(TheFileID); + } + + // Do a standard file entry lookup. + CurDir = CurDirLookup; + const FileEntry *FE = + HeaderInfo.LookupFile(FilenameStart, FilenameEnd, + isAngled, FromDir, CurDir, CurFileEnt); + if (FE) return FE; + + // Otherwise, see if this is a subframework header. If so, this is relative + // to one of the headers on the #include stack. Walk the list of the current + // headers on the #include stack and pass them to HeaderInfo. + if (CurLexer && !CurLexer->Is_PragmaLexer) { + CurFileEnt = SourceMgr.getFileEntryForFileID(CurLexer->getCurFileID()); + if ((FE = HeaderInfo.LookupSubframeworkHeader(FilenameStart, FilenameEnd, + CurFileEnt))) + return FE; + } + + for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) { + IncludeStackInfo &ISEntry = IncludeMacroStack[e-i-1]; + if (ISEntry.TheLexer && !ISEntry.TheLexer->Is_PragmaLexer) { + CurFileEnt = + SourceMgr.getFileEntryForFileID(ISEntry.TheLexer->getCurFileID()); + if ((FE = HeaderInfo.LookupSubframeworkHeader(FilenameStart, FilenameEnd, + CurFileEnt))) + return FE; + } + } + + // Otherwise, we really couldn't find the file. + return 0; +} + +/// isInPrimaryFile - Return true if we're in the top-level file, not in a +/// #include. +bool Preprocessor::isInPrimaryFile() const { + if (CurLexer && !CurLexer->Is_PragmaLexer) + return CurLexer->isMainFile(); + + // If there are any stacked lexers, we're in a #include. + for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) + if (IncludeMacroStack[i].TheLexer && + !IncludeMacroStack[i].TheLexer->Is_PragmaLexer) + return IncludeMacroStack[i].TheLexer->isMainFile(); + return false; +} + +/// getCurrentLexer - Return the current file lexer being lexed from. Note +/// that this ignores any potentially active macro expansions and _Pragma +/// expansions going on at the time. +Lexer *Preprocessor::getCurrentFileLexer() const { + if (CurLexer && !CurLexer->Is_PragmaLexer) return CurLexer; + + // Look for a stacked lexer. + for (unsigned i = IncludeMacroStack.size(); i != 0; --i) { + Lexer *L = IncludeMacroStack[i-1].TheLexer; + if (L && !L->Is_PragmaLexer) // Ignore macro & _Pragma expansions. + return L; + } + return 0; +} + + +/// EnterSourceFile - Add a source file to the top of the include stack and +/// start lexing tokens from it instead of the current buffer. Return true +/// on failure. +void Preprocessor::EnterSourceFile(unsigned FileID, + const DirectoryLookup *CurDir, + bool isMainFile) { + assert(CurMacroExpander == 0 && "Cannot #include a file inside a macro!"); + ++NumEnteredSourceFiles; + + if (MaxIncludeStackDepth < IncludeMacroStack.size()) + MaxIncludeStackDepth = IncludeMacroStack.size(); + + const llvm::MemoryBuffer *Buffer = SourceMgr.getBuffer(FileID); + Lexer *TheLexer = new Lexer(Buffer, FileID, *this); + if (isMainFile) TheLexer->setIsMainFile(); + EnterSourceFileWithLexer(TheLexer, CurDir); +} + +/// EnterSourceFile - Add a source file to the top of the include stack and +/// start lexing tokens from it instead of the current buffer. +void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer, + const DirectoryLookup *CurDir) { + + // Add the current lexer to the include stack. + if (CurLexer || CurMacroExpander) + IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup, + CurMacroExpander)); + + CurLexer = TheLexer; + CurDirLookup = CurDir; + CurMacroExpander = 0; + + // Notify the client, if desired, that we are in a new source file. + if (Callbacks && !CurLexer->Is_PragmaLexer) { + DirectoryLookup::DirType FileType = DirectoryLookup::NormalHeaderDir; + + // Get the file entry for the current file. + if (const FileEntry *FE = + SourceMgr.getFileEntryForFileID(CurLexer->getCurFileID())) + FileType = HeaderInfo.getFileDirFlavor(FE); + + Callbacks->FileChanged(SourceLocation(CurLexer->getCurFileID(), 0), + PPCallbacks::EnterFile, FileType); + } +} + + + +/// EnterMacro - Add a Macro to the top of the include stack and start lexing +/// tokens from it instead of the current buffer. +void Preprocessor::EnterMacro(LexerToken &Tok, MacroArgs *Args) { + IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup, + CurMacroExpander)); + CurLexer = 0; + CurDirLookup = 0; + + CurMacroExpander = new MacroExpander(Tok, Args, *this); +} + +/// EnterTokenStream - Add a "macro" context to the top of the include stack, +/// which will cause the lexer to start returning the specified tokens. Note +/// that these tokens will be re-macro-expanded when/if expansion is enabled. +/// This method assumes that the specified stream of tokens has a permanent +/// owner somewhere, so they do not need to be copied. +void Preprocessor::EnterTokenStream(const LexerToken *Toks, unsigned NumToks) { + // Save our current state. + IncludeMacroStack.push_back(IncludeStackInfo(CurLexer, CurDirLookup, + CurMacroExpander)); + CurLexer = 0; + CurDirLookup = 0; + + // Create a macro expander to expand from the specified token stream. + CurMacroExpander = new MacroExpander(Toks, NumToks, *this); +} + +/// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the +/// lexer stack. This should only be used in situations where the current +/// state of the top-of-stack lexer is known. +void Preprocessor::RemoveTopOfLexerStack() { + assert(!IncludeMacroStack.empty() && "Ran out of stack entries to load"); + delete CurLexer; + delete CurMacroExpander; + CurLexer = IncludeMacroStack.back().TheLexer; + CurDirLookup = IncludeMacroStack.back().TheDirLookup; + CurMacroExpander = IncludeMacroStack.back().TheMacroExpander; + IncludeMacroStack.pop_back(); +} + +//===----------------------------------------------------------------------===// +// Macro Expansion Handling. +//===----------------------------------------------------------------------===// + +/// RegisterBuiltinMacro - Register the specified identifier in the identifier +/// table and mark it as a builtin macro to be expanded. +IdentifierInfo *Preprocessor::RegisterBuiltinMacro(const char *Name) { + // Get the identifier. + IdentifierInfo *Id = getIdentifierInfo(Name); + + // Mark it as being a macro that is builtin. + MacroInfo *MI = new MacroInfo(SourceLocation()); + MI->setIsBuiltinMacro(); + Id->setMacroInfo(MI); + return Id; +} + + +/// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the +/// identifier table. +void Preprocessor::RegisterBuiltinMacros() { + Ident__LINE__ = RegisterBuiltinMacro("__LINE__"); + Ident__FILE__ = RegisterBuiltinMacro("__FILE__"); + Ident__DATE__ = RegisterBuiltinMacro("__DATE__"); + Ident__TIME__ = RegisterBuiltinMacro("__TIME__"); + Ident_Pragma = RegisterBuiltinMacro("_Pragma"); + + // GCC Extensions. + Ident__BASE_FILE__ = RegisterBuiltinMacro("__BASE_FILE__"); + Ident__INCLUDE_LEVEL__ = RegisterBuiltinMacro("__INCLUDE_LEVEL__"); + Ident__TIMESTAMP__ = RegisterBuiltinMacro("__TIMESTAMP__"); +} + +/// isTrivialSingleTokenExpansion - Return true if MI, which has a single token +/// in its expansion, currently expands to that token literally. +static bool isTrivialSingleTokenExpansion(const MacroInfo *MI, + const IdentifierInfo *MacroIdent) { + IdentifierInfo *II = MI->getReplacementToken(0).getIdentifierInfo(); + + // If the token isn't an identifier, it's always literally expanded. + if (II == 0) return true; + + // If the identifier is a macro, and if that macro is enabled, it may be + // expanded so it's not a trivial expansion. + if (II->getMacroInfo() && II->getMacroInfo()->isEnabled() && + // Fast expanding "#define X X" is ok, because X would be disabled. + II != MacroIdent) + return false; + + // If this is an object-like macro invocation, it is safe to trivially expand + // it. + if (MI->isObjectLike()) return true; + + // If this is a function-like macro invocation, it's safe to trivially expand + // as long as the identifier is not a macro argument. + for (MacroInfo::arg_iterator I = MI->arg_begin(), E = MI->arg_end(); + I != E; ++I) + if (*I == II) + return false; // Identifier is a macro argument. + + return true; +} + + +/// isNextPPTokenLParen - Determine whether the next preprocessor token to be +/// lexed is a '('. If so, consume the token and return true, if not, this +/// method should have no observable side-effect on the lexed tokens. +bool Preprocessor::isNextPPTokenLParen() { + // Do some quick tests for rejection cases. + unsigned Val; + if (CurLexer) + Val = CurLexer->isNextPPTokenLParen(); + else + Val = CurMacroExpander->isNextTokenLParen(); + + if (Val == 2) { + // If we ran off the end of the lexer or macro expander, walk the include + // stack, looking for whatever will return the next token. + for (unsigned i = IncludeMacroStack.size(); Val == 2 && i != 0; --i) { + IncludeStackInfo &Entry = IncludeMacroStack[i-1]; + if (Entry.TheLexer) + Val = Entry.TheLexer->isNextPPTokenLParen(); + else + Val = Entry.TheMacroExpander->isNextTokenLParen(); + } + } + + // Okay, if we know that the token is a '(', lex it and return. Otherwise we + // have found something that isn't a '(' or we found the end of the + // translation unit. In either case, return false. + if (Val != 1) + return false; + + LexerToken Tok; + LexUnexpandedToken(Tok); + assert(Tok.getKind() == tok::l_paren && "Error computing l-paren-ness?"); + return true; +} + +/// HandleMacroExpandedIdentifier - If an identifier token is read that is to be +/// expanded as a macro, handle it and return the next token as 'Identifier'. +bool Preprocessor::HandleMacroExpandedIdentifier(LexerToken &Identifier, + MacroInfo *MI) { + + // If this is a builtin macro, like __LINE__ or _Pragma, handle it specially. + if (MI->isBuiltinMacro()) { + ExpandBuiltinMacro(Identifier); + return false; + } + + // If this is the first use of a target-specific macro, warn about it. + if (MI->isTargetSpecific()) { + MI->setIsTargetSpecific(false); // Don't warn on second use. + getTargetInfo().DiagnoseNonPortability(Identifier.getLocation(), + diag::port_target_macro_use); + } + + /// Args - If this is a function-like macro expansion, this contains, + /// for each macro argument, the list of tokens that were provided to the + /// invocation. + MacroArgs *Args = 0; + + // If this is a function-like macro, read the arguments. + if (MI->isFunctionLike()) { + // C99 6.10.3p10: If the preprocessing token immediately after the the macro + // name isn't a '(', this macro should not be expanded. + if (!isNextPPTokenLParen()) + return true; + + // Remember that we are now parsing the arguments to a macro invocation. + // Preprocessor directives used inside macro arguments are not portable, and + // this enables the warning. + InMacroArgs = true; + Args = ReadFunctionLikeMacroArgs(Identifier, MI); + + // Finished parsing args. + InMacroArgs = false; + + // If there was an error parsing the arguments, bail out. + if (Args == 0) return false; + + ++NumFnMacroExpanded; + } else { + ++NumMacroExpanded; + } + + // Notice that this macro has been used. + MI->setIsUsed(true); + + // If we started lexing a macro, enter the macro expansion body. + + // If this macro expands to no tokens, don't bother to push it onto the + // expansion stack, only to take it right back off. + if (MI->getNumTokens() == 0) { + // No need for arg info. + if (Args) Args->destroy(); + + // Ignore this macro use, just return the next token in the current + // buffer. + bool HadLeadingSpace = Identifier.hasLeadingSpace(); + bool IsAtStartOfLine = Identifier.isAtStartOfLine(); + + Lex(Identifier); + + // If the identifier isn't on some OTHER line, inherit the leading + // whitespace/first-on-a-line property of this token. This handles + // stuff like "! XX," -> "! ," and " XX," -> " ,", when XX is + // empty. + if (!Identifier.isAtStartOfLine()) { + if (IsAtStartOfLine) Identifier.setFlag(LexerToken::StartOfLine); + if (HadLeadingSpace) Identifier.setFlag(LexerToken::LeadingSpace); + } + ++NumFastMacroExpanded; + return false; + + } else if (MI->getNumTokens() == 1 && + isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo())){ + // Otherwise, if this macro expands into a single trivially-expanded + // token: expand it now. This handles common cases like + // "#define VAL 42". + + // Propagate the isAtStartOfLine/hasLeadingSpace markers of the macro + // identifier to the expanded token. + bool isAtStartOfLine = Identifier.isAtStartOfLine(); + bool hasLeadingSpace = Identifier.hasLeadingSpace(); + + // Remember where the token is instantiated. + SourceLocation InstantiateLoc = Identifier.getLocation(); + + // Replace the result token. + Identifier = MI->getReplacementToken(0); + + // Restore the StartOfLine/LeadingSpace markers. + Identifier.setFlagValue(LexerToken::StartOfLine , isAtStartOfLine); + Identifier.setFlagValue(LexerToken::LeadingSpace, hasLeadingSpace); + + // Update the tokens location to include both its logical and physical + // locations. + SourceLocation Loc = + SourceMgr.getInstantiationLoc(Identifier.getLocation(), InstantiateLoc); + Identifier.setLocation(Loc); + + // If this is #define X X, we must mark the result as unexpandible. + if (IdentifierInfo *NewII = Identifier.getIdentifierInfo()) + if (NewII->getMacroInfo() == MI) + Identifier.setFlag(LexerToken::DisableExpand); + + // Since this is not an identifier token, it can't be macro expanded, so + // we're done. + ++NumFastMacroExpanded; + return false; + } + + // Start expanding the macro. + EnterMacro(Identifier, Args); + + // Now that the macro is at the top of the include stack, ask the + // preprocessor to read the next token from it. + Lex(Identifier); + return false; +} + +/// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is +/// invoked to read all of the actual arguments specified for the macro +/// invocation. This returns null on error. +MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(LexerToken &MacroName, + MacroInfo *MI) { + // The number of fixed arguments to parse. + unsigned NumFixedArgsLeft = MI->getNumArgs(); + bool isVariadic = MI->isVariadic(); + + // Outer loop, while there are more arguments, keep reading them. + LexerToken Tok; + Tok.setKind(tok::comma); + --NumFixedArgsLeft; // Start reading the first arg. + + // ArgTokens - Build up a list of tokens that make up each argument. Each + // argument is separated by an EOF token. Use a SmallVector so we can avoid + // heap allocations in the common case. + llvm::SmallVector<LexerToken, 64> ArgTokens; + + unsigned NumActuals = 0; + while (Tok.getKind() == tok::comma) { + // C99 6.10.3p11: Keep track of the number of l_parens we have seen. + unsigned NumParens = 0; + + while (1) { + // Read arguments as unexpanded tokens. This avoids issues, e.g., where + // an argument value in a macro could expand to ',' or '(' or ')'. + LexUnexpandedToken(Tok); + + if (Tok.getKind() == tok::eof) { + Diag(MacroName, diag::err_unterm_macro_invoc); + // Do not lose the EOF. Return it to the client. + MacroName = Tok; + return 0; + } else if (Tok.getKind() == tok::r_paren) { + // If we found the ) token, the macro arg list is done. + if (NumParens-- == 0) + break; + } else if (Tok.getKind() == tok::l_paren) { + ++NumParens; + } else if (Tok.getKind() == tok::comma && NumParens == 0) { + // Comma ends this argument if there are more fixed arguments expected. + if (NumFixedArgsLeft) + break; + + // If this is not a variadic macro, too many args were specified. + if (!isVariadic) { + // Emit the diagnostic at the macro name in case there is a missing ). + // Emitting it at the , could be far away from the macro name. + Diag(MacroName, diag::err_too_many_args_in_macro_invoc); + return 0; + } + // Otherwise, continue to add the tokens to this variable argument. + } else if (Tok.getKind() == tok::comment && !KeepMacroComments) { + // If this is a comment token in the argument list and we're just in + // -C mode (not -CC mode), discard the comment. + continue; + } + + ArgTokens.push_back(Tok); + } + + // Empty arguments are standard in C99 and supported as an extension in + // other modes. + if (ArgTokens.empty() && !Features.C99) + Diag(Tok, diag::ext_empty_fnmacro_arg); + + // Add a marker EOF token to the end of the token list for this argument. + LexerToken EOFTok; + EOFTok.startToken(); + EOFTok.setKind(tok::eof); + EOFTok.setLocation(Tok.getLocation()); + EOFTok.setLength(0); + ArgTokens.push_back(EOFTok); + ++NumActuals; + --NumFixedArgsLeft; + }; + + // Okay, we either found the r_paren. Check to see if we parsed too few + // arguments. + unsigned MinArgsExpected = MI->getNumArgs(); + + // See MacroArgs instance var for description of this. + bool isVarargsElided = false; + + if (NumActuals < MinArgsExpected) { + // There are several cases where too few arguments is ok, handle them now. + if (NumActuals+1 == MinArgsExpected && MI->isVariadic()) { + // Varargs where the named vararg parameter is missing: ok as extension. + // #define A(x, ...) + // A("blah") + Diag(Tok, diag::ext_missing_varargs_arg); + + // Remember this occurred if this is a C99 macro invocation with at least + // one actual argument. + isVarargsElided = MI->isC99Varargs() && MI->getNumArgs() > 1; + } else if (MI->getNumArgs() == 1) { + // #define A(x) + // A() + // is ok because it is an empty argument. + + // Empty arguments are standard in C99 and supported as an extension in + // other modes. + if (ArgTokens.empty() && !Features.C99) + Diag(Tok, diag::ext_empty_fnmacro_arg); + } else { + // Otherwise, emit the error. + Diag(Tok, diag::err_too_few_args_in_macro_invoc); + return 0; + } + + // Add a marker EOF token to the end of the token list for this argument. + SourceLocation EndLoc = Tok.getLocation(); + Tok.startToken(); + Tok.setKind(tok::eof); + Tok.setLocation(EndLoc); + Tok.setLength(0); + ArgTokens.push_back(Tok); + } + + return MacroArgs::create(MI, &ArgTokens[0], ArgTokens.size(),isVarargsElided); +} + +/// ComputeDATE_TIME - Compute the current time, enter it into the specified +/// scratch buffer, then return DATELoc/TIMELoc locations with the position of +/// the identifier tokens inserted. +static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc, + Preprocessor &PP) { + time_t TT = time(0); + struct tm *TM = localtime(&TT); + + static const char * const Months[] = { + "Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec" + }; + + char TmpBuffer[100]; + sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday, + TM->tm_year+1900); + DATELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer)); + + sprintf(TmpBuffer, "\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min, TM->tm_sec); + TIMELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer)); +} + +/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded +/// as a builtin macro, handle it and return the next token as 'Tok'. +void Preprocessor::ExpandBuiltinMacro(LexerToken &Tok) { + // Figure out which token this is. + IdentifierInfo *II = Tok.getIdentifierInfo(); + assert(II && "Can't be a macro without id info!"); + + // If this is an _Pragma directive, expand it, invoke the pragma handler, then + // lex the token after it. + if (II == Ident_Pragma) + return Handle_Pragma(Tok); + + ++NumBuiltinMacroExpanded; + + char TmpBuffer[100]; + + // Set up the return result. + Tok.setIdentifierInfo(0); + Tok.clearFlag(LexerToken::NeedsCleaning); + + if (II == Ident__LINE__) { + // __LINE__ expands to a simple numeric value. + sprintf(TmpBuffer, "%u", SourceMgr.getLineNumber(Tok.getLocation())); + unsigned Length = strlen(TmpBuffer); + Tok.setKind(tok::numeric_constant); + Tok.setLength(Length); + Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation())); + } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) { + SourceLocation Loc = Tok.getLocation(); + if (II == Ident__BASE_FILE__) { + Diag(Tok, diag::ext_pp_base_file); + SourceLocation NextLoc = SourceMgr.getIncludeLoc(Loc.getFileID()); + while (NextLoc.getFileID() != 0) { + Loc = NextLoc; + NextLoc = SourceMgr.getIncludeLoc(Loc.getFileID()); + } + } + + // Escape this filename. Turn '\' -> '\\' '"' -> '\"' + std::string FN = SourceMgr.getSourceName(Loc); + FN = '"' + Lexer::Stringify(FN) + '"'; + Tok.setKind(tok::string_literal); + Tok.setLength(FN.size()); + Tok.setLocation(CreateString(&FN[0], FN.size(), Tok.getLocation())); + } else if (II == Ident__DATE__) { + if (!DATELoc.isValid()) + ComputeDATE_TIME(DATELoc, TIMELoc, *this); + Tok.setKind(tok::string_literal); + Tok.setLength(strlen("\"Mmm dd yyyy\"")); + Tok.setLocation(SourceMgr.getInstantiationLoc(DATELoc, Tok.getLocation())); + } else if (II == Ident__TIME__) { + if (!TIMELoc.isValid()) + ComputeDATE_TIME(DATELoc, TIMELoc, *this); + Tok.setKind(tok::string_literal); + Tok.setLength(strlen("\"hh:mm:ss\"")); + Tok.setLocation(SourceMgr.getInstantiationLoc(TIMELoc, Tok.getLocation())); + } else if (II == Ident__INCLUDE_LEVEL__) { + Diag(Tok, diag::ext_pp_include_level); + + // Compute the include depth of this token. + unsigned Depth = 0; + SourceLocation Loc = SourceMgr.getIncludeLoc(Tok.getLocation().getFileID()); + for (; Loc.getFileID() != 0; ++Depth) + Loc = SourceMgr.getIncludeLoc(Loc.getFileID()); + + // __INCLUDE_LEVEL__ expands to a simple numeric value. + sprintf(TmpBuffer, "%u", Depth); + unsigned Length = strlen(TmpBuffer); + Tok.setKind(tok::numeric_constant); + Tok.setLength(Length); + Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation())); + } else if (II == Ident__TIMESTAMP__) { + // MSVC, ICC, GCC, VisualAge C++ extension. The generated string should be + // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime. + Diag(Tok, diag::ext_pp_timestamp); + + // Get the file that we are lexing out of. If we're currently lexing from + // a macro, dig into the include stack. + const FileEntry *CurFile = 0; + Lexer *TheLexer = getCurrentFileLexer(); + + if (TheLexer) + CurFile = SourceMgr.getFileEntryForFileID(TheLexer->getCurFileID()); + + // If this file is older than the file it depends on, emit a diagnostic. + const char *Result; + if (CurFile) { + time_t TT = CurFile->getModificationTime(); + struct tm *TM = localtime(&TT); + Result = asctime(TM); + } else { + Result = "??? ??? ?? ??:??:?? ????\n"; + } + TmpBuffer[0] = '"'; + strcpy(TmpBuffer+1, Result); + unsigned Len = strlen(TmpBuffer); + TmpBuffer[Len-1] = '"'; // Replace the newline with a quote. + Tok.setKind(tok::string_literal); + Tok.setLength(Len); + Tok.setLocation(CreateString(TmpBuffer, Len, Tok.getLocation())); + } else { + assert(0 && "Unknown identifier!"); + } +} + +//===----------------------------------------------------------------------===// +// Lexer Event Handling. +//===----------------------------------------------------------------------===// + +/// LookUpIdentifierInfo - Given a tok::identifier token, look up the +/// identifier information for the token and install it into the token. +IdentifierInfo *Preprocessor::LookUpIdentifierInfo(LexerToken &Identifier, + const char *BufPtr) { + assert(Identifier.getKind() == tok::identifier && "Not an identifier!"); + assert(Identifier.getIdentifierInfo() == 0 && "Identinfo already exists!"); + + // Look up this token, see if it is a macro, or if it is a language keyword. + IdentifierInfo *II; + if (BufPtr && !Identifier.needsCleaning()) { + // No cleaning needed, just use the characters from the lexed buffer. + II = getIdentifierInfo(BufPtr, BufPtr+Identifier.getLength()); + } else { + // Cleaning needed, alloca a buffer, clean into it, then use the buffer. + const char *TmpBuf = (char*)alloca(Identifier.getLength()); + unsigned Size = getSpelling(Identifier, TmpBuf); + II = getIdentifierInfo(TmpBuf, TmpBuf+Size); + } + Identifier.setIdentifierInfo(II); + return II; +} + + +/// HandleIdentifier - This callback is invoked when the lexer reads an +/// identifier. This callback looks up the identifier in the map and/or +/// potentially macro expands it or turns it into a named token (like 'for'). +void Preprocessor::HandleIdentifier(LexerToken &Identifier) { + assert(Identifier.getIdentifierInfo() && + "Can't handle identifiers without identifier info!"); + + IdentifierInfo &II = *Identifier.getIdentifierInfo(); + + // If this identifier was poisoned, and if it was not produced from a macro + // expansion, emit an error. + if (II.isPoisoned() && CurLexer) { + if (&II != Ident__VA_ARGS__) // We warn about __VA_ARGS__ with poisoning. + Diag(Identifier, diag::err_pp_used_poisoned_id); + else + Diag(Identifier, diag::ext_pp_bad_vaargs_use); + } + + // If this is a macro to be expanded, do it. + if (MacroInfo *MI = II.getMacroInfo()) { + if (!DisableMacroExpansion && !Identifier.isExpandDisabled()) { + if (MI->isEnabled()) { + if (!HandleMacroExpandedIdentifier(Identifier, MI)) + return; + } else { + // C99 6.10.3.4p2 says that a disabled macro may never again be + // expanded, even if it's in a context where it could be expanded in the + // future. + Identifier.setFlag(LexerToken::DisableExpand); + } + } + } else if (II.isOtherTargetMacro() && !DisableMacroExpansion) { + // If this identifier is a macro on some other target, emit a diagnostic. + // This diagnosic is only emitted when macro expansion is enabled, because + // the macro would not have been expanded for the other target either. + II.setIsOtherTargetMacro(false); // Don't warn on second use. + getTargetInfo().DiagnoseNonPortability(Identifier.getLocation(), + diag::port_target_macro_use); + + } + + // C++ 2.11p2: If this is an alternative representation of a C++ operator, + // then we act as if it is the actual operator and not the textual + // representation of it. + if (II.isCPlusPlusOperatorKeyword()) + Identifier.setIdentifierInfo(0); + + // Change the kind of this identifier to the appropriate token kind, e.g. + // turning "for" into a keyword. + Identifier.setKind(II.getTokenID()); + + // If this is an extension token, diagnose its use. + // FIXME: tried (unsuccesfully) to shut this up when compiling with gnu99 + // For now, I'm just commenting it out (while I work on attributes). + if (II.isExtensionToken() && Features.C99) + Diag(Identifier, diag::ext_token_used); +} + +/// HandleEndOfFile - This callback is invoked when the lexer hits the end of +/// the current file. This either returns the EOF token or pops a level off +/// the include stack and keeps going. +bool Preprocessor::HandleEndOfFile(LexerToken &Result, bool isEndOfMacro) { + assert(!CurMacroExpander && + "Ending a file when currently in a macro!"); + + // See if this file had a controlling macro. + if (CurLexer) { // Not ending a macro, ignore it. + if (const IdentifierInfo *ControllingMacro = + CurLexer->MIOpt.GetControllingMacroAtEndOfFile()) { + // Okay, this has a controlling macro, remember in PerFileInfo. + if (const FileEntry *FE = + SourceMgr.getFileEntryForFileID(CurLexer->getCurFileID())) + HeaderInfo.SetFileControllingMacro(FE, ControllingMacro); + } + } + + // If this is a #include'd file, pop it off the include stack and continue + // lexing the #includer file. + if (!IncludeMacroStack.empty()) { + // We're done with the #included file. + RemoveTopOfLexerStack(); + + // Notify the client, if desired, that we are in a new source file. + if (Callbacks && !isEndOfMacro && CurLexer) { + DirectoryLookup::DirType FileType = DirectoryLookup::NormalHeaderDir; + + // Get the file entry for the current file. + if (const FileEntry *FE = + SourceMgr.getFileEntryForFileID(CurLexer->getCurFileID())) + FileType = HeaderInfo.getFileDirFlavor(FE); + + Callbacks->FileChanged(CurLexer->getSourceLocation(CurLexer->BufferPtr), + PPCallbacks::ExitFile, FileType); + } + + // Client should lex another token. + return false; + } + + Result.startToken(); + CurLexer->BufferPtr = CurLexer->BufferEnd; + CurLexer->FormTokenWithChars(Result, CurLexer->BufferEnd); + Result.setKind(tok::eof); + + // We're done with the #included file. + delete CurLexer; + CurLexer = 0; + + // This is the end of the top-level file. If the diag::pp_macro_not_used + // diagnostic is enabled, walk all of the identifiers, looking for macros that + // have not been used. + if (Diags.getDiagnosticLevel(diag::pp_macro_not_used) != Diagnostic::Ignored){ + for (IdentifierTable::iterator I = Identifiers.begin(), + E = Identifiers.end(); I != E; ++I) { + const IdentifierInfo &II = I->getValue(); + if (II.getMacroInfo() && !II.getMacroInfo()->isUsed()) + Diag(II.getMacroInfo()->getDefinitionLoc(), diag::pp_macro_not_used); + } + } + + return true; +} + +/// HandleEndOfMacro - This callback is invoked when the lexer hits the end of +/// the current macro expansion or token stream expansion. +bool Preprocessor::HandleEndOfMacro(LexerToken &Result) { + assert(CurMacroExpander && !CurLexer && + "Ending a macro when currently in a #include file!"); + + delete CurMacroExpander; + + // Handle this like a #include file being popped off the stack. + CurMacroExpander = 0; + return HandleEndOfFile(Result, true); +} + + +//===----------------------------------------------------------------------===// +// Utility Methods for Preprocessor Directive Handling. +//===----------------------------------------------------------------------===// + +/// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the +/// current line until the tok::eom token is found. +void Preprocessor::DiscardUntilEndOfDirective() { + LexerToken Tmp; + do { + LexUnexpandedToken(Tmp); + } while (Tmp.getKind() != tok::eom); +} + +/// isCXXNamedOperator - Returns "true" if the token is a named operator in C++. +static bool isCXXNamedOperator(const std::string &Spelling) { + return Spelling == "and" || Spelling == "bitand" || Spelling == "bitor" || + Spelling == "compl" || Spelling == "not" || Spelling == "not_eq" || + Spelling == "or" || Spelling == "xor"; +} + +/// ReadMacroName - Lex and validate a macro name, which occurs after a +/// #define or #undef. This sets the token kind to eom and discards the rest +/// of the macro line if the macro name is invalid. isDefineUndef is 1 if +/// this is due to a a #define, 2 if #undef directive, 0 if it is something +/// else (e.g. #ifdef). +void Preprocessor::ReadMacroName(LexerToken &MacroNameTok, char isDefineUndef) { + // Read the token, don't allow macro expansion on it. + LexUnexpandedToken(MacroNameTok); + + // Missing macro name? + if (MacroNameTok.getKind() == tok::eom) + return Diag(MacroNameTok, diag::err_pp_missing_macro_name); + + IdentifierInfo *II = MacroNameTok.getIdentifierInfo(); + if (II == 0) { + std::string Spelling = getSpelling(MacroNameTok); + if (isCXXNamedOperator(Spelling)) + // C++ 2.5p2: Alternative tokens behave the same as its primary token + // except for their spellings. + Diag(MacroNameTok, diag::err_pp_operator_used_as_macro_name, Spelling); + else + Diag(MacroNameTok, diag::err_pp_macro_not_identifier); + // Fall through on error. + } else if (isDefineUndef && II->getPPKeywordID() == tok::pp_defined) { + // Error if defining "defined": C99 6.10.8.4. + Diag(MacroNameTok, diag::err_defined_macro_name); + } else if (isDefineUndef && II->getMacroInfo() && + II->getMacroInfo()->isBuiltinMacro()) { + // Error if defining "__LINE__" and other builtins: C99 6.10.8.4. + if (isDefineUndef == 1) + Diag(MacroNameTok, diag::pp_redef_builtin_macro); + else + Diag(MacroNameTok, diag::pp_undef_builtin_macro); + } else { + // Okay, we got a good identifier node. Return it. + return; + } + + // Invalid macro name, read and discard the rest of the line. Then set the + // token kind to tok::eom. + MacroNameTok.setKind(tok::eom); + return DiscardUntilEndOfDirective(); +} + +/// CheckEndOfDirective - Ensure that the next token is a tok::eom token. If +/// not, emit a diagnostic and consume up until the eom. +void Preprocessor::CheckEndOfDirective(const char *DirType) { + LexerToken Tmp; + Lex(Tmp); + // There should be no tokens after the directive, but we allow them as an + // extension. + while (Tmp.getKind() == tok::comment) // Skip comments in -C mode. + Lex(Tmp); + + if (Tmp.getKind() != tok::eom) { + Diag(Tmp, diag::ext_pp_extra_tokens_at_eol, DirType); + DiscardUntilEndOfDirective(); + } +} + + + +/// SkipExcludedConditionalBlock - We just read a #if or related directive and +/// decided that the subsequent tokens are in the #if'd out portion of the +/// file. Lex the rest of the file, until we see an #endif. If +/// FoundNonSkipPortion is true, then we have already emitted code for part of +/// this #if directive, so #else/#elif blocks should never be entered. If ElseOk +/// is true, then #else directives are ok, if not, then we have already seen one +/// so a #else directive is a duplicate. When this returns, the caller can lex +/// the first valid token. +void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, + bool FoundNonSkipPortion, + bool FoundElse) { + ++NumSkipped; + assert(CurMacroExpander == 0 && CurLexer && + "Lexing a macro, not a file?"); + + CurLexer->pushConditionalLevel(IfTokenLoc, /*isSkipping*/false, + FoundNonSkipPortion, FoundElse); + + // Enter raw mode to disable identifier lookup (and thus macro expansion), + // disabling warnings, etc. + CurLexer->LexingRawMode = true; + LexerToken Tok; + while (1) { + CurLexer->Lex(Tok); + + // If this is the end of the buffer, we have an error. + if (Tok.getKind() == tok::eof) { + // Emit errors for each unterminated conditional on the stack, including + // the current one. + while (!CurLexer->ConditionalStack.empty()) { + Diag(CurLexer->ConditionalStack.back().IfLoc, + diag::err_pp_unterminated_conditional); + CurLexer->ConditionalStack.pop_back(); + } + + // Just return and let the caller lex after this #include. + break; + } + + // If this token is not a preprocessor directive, just skip it. + if (Tok.getKind() != tok::hash || !Tok.isAtStartOfLine()) + continue; + + // We just parsed a # character at the start of a line, so we're in + // directive mode. Tell the lexer this so any newlines we see will be + // converted into an EOM token (this terminates the macro). + CurLexer->ParsingPreprocessorDirective = true; + CurLexer->KeepCommentMode = false; + + + // Read the next token, the directive flavor. + LexUnexpandedToken(Tok); + + // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or + // something bogus), skip it. + if (Tok.getKind() != tok::identifier) { + CurLexer->ParsingPreprocessorDirective = false; + // Restore comment saving mode. + CurLexer->KeepCommentMode = KeepComments; + continue; + } + + // If the first letter isn't i or e, it isn't intesting to us. We know that + // this is safe in the face of spelling differences, because there is no way + // to spell an i/e in a strange way that is another letter. Skipping this + // allows us to avoid looking up the identifier info for #define/#undef and + // other common directives. + const char *RawCharData = SourceMgr.getCharacterData(Tok.getLocation()); + char FirstChar = RawCharData[0]; + if (FirstChar >= 'a' && FirstChar <= 'z' && + FirstChar != 'i' && FirstChar != 'e') { + CurLexer->ParsingPreprocessorDirective = false; + // Restore comment saving mode. + CurLexer->KeepCommentMode = KeepComments; + continue; + } + + // Get the identifier name without trigraphs or embedded newlines. Note + // that we can't use Tok.getIdentifierInfo() because its lookup is disabled + // when skipping. + // TODO: could do this with zero copies in the no-clean case by using + // strncmp below. + char Directive[20]; + unsigned IdLen; + if (!Tok.needsCleaning() && Tok.getLength() < 20) { + IdLen = Tok.getLength(); + memcpy(Directive, RawCharData, IdLen); + Directive[IdLen] = 0; + } else { + std::string DirectiveStr = getSpelling(Tok); + IdLen = DirectiveStr.size(); + if (IdLen >= 20) { + CurLexer->ParsingPreprocessorDirective = false; + // Restore comment saving mode. + CurLexer->KeepCommentMode = KeepComments; + continue; + } + memcpy(Directive, &DirectiveStr[0], IdLen); + Directive[IdLen] = 0; + } + + if (FirstChar == 'i' && Directive[1] == 'f') { + if ((IdLen == 2) || // "if" + (IdLen == 5 && !strcmp(Directive+2, "def")) || // "ifdef" + (IdLen == 6 && !strcmp(Directive+2, "ndef"))) { // "ifndef" + // We know the entire #if/#ifdef/#ifndef block will be skipped, don't + // bother parsing the condition. + DiscardUntilEndOfDirective(); + CurLexer->pushConditionalLevel(Tok.getLocation(), /*wasskipping*/true, + /*foundnonskip*/false, + /*fnddelse*/false); + } + } else if (FirstChar == 'e') { + if (IdLen == 5 && !strcmp(Directive+1, "ndif")) { // "endif" + CheckEndOfDirective("#endif"); + PPConditionalInfo CondInfo; + CondInfo.WasSkipping = true; // Silence bogus warning. + bool InCond = CurLexer->popConditionalLevel(CondInfo); + InCond = InCond; // Silence warning in no-asserts mode. + assert(!InCond && "Can't be skipping if not in a conditional!"); + + // If we popped the outermost skipping block, we're done skipping! + if (!CondInfo.WasSkipping) + break; + } else if (IdLen == 4 && !strcmp(Directive+1, "lse")) { // "else". + // #else directive in a skipping conditional. If not in some other + // skipping conditional, and if #else hasn't already been seen, enter it + // as a non-skipping conditional. + CheckEndOfDirective("#else"); + PPConditionalInfo &CondInfo = CurLexer->peekConditionalLevel(); + + // If this is a #else with a #else before it, report the error. + if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_else_after_else); + + // Note that we've seen a #else in this conditional. + CondInfo.FoundElse = true; + + // If the conditional is at the top level, and the #if block wasn't + // entered, enter the #else block now. + if (!CondInfo.WasSkipping && !CondInfo.FoundNonSkip) { + CondInfo.FoundNonSkip = true; + break; + } + } else if (IdLen == 4 && !strcmp(Directive+1, "lif")) { // "elif". + PPConditionalInfo &CondInfo = CurLexer->peekConditionalLevel(); + + bool ShouldEnter; + // If this is in a skipping block or if we're already handled this #if + // block, don't bother parsing the condition. + if (CondInfo.WasSkipping || CondInfo.FoundNonSkip) { + DiscardUntilEndOfDirective(); + ShouldEnter = false; + } else { + // Restore the value of LexingRawMode so that identifiers are + // looked up, etc, inside the #elif expression. + assert(CurLexer->LexingRawMode && "We have to be skipping here!"); + CurLexer->LexingRawMode = false; + IdentifierInfo *IfNDefMacro = 0; + ShouldEnter = EvaluateDirectiveExpression(IfNDefMacro); + CurLexer->LexingRawMode = true; + } + + // If this is a #elif with a #else before it, report the error. + if (CondInfo.FoundElse) Diag(Tok, diag::pp_err_elif_after_else); + + // If this condition is true, enter it! + if (ShouldEnter) { + CondInfo.FoundNonSkip = true; + break; + } + } + } + + CurLexer->ParsingPreprocessorDirective = false; + // Restore comment saving mode. + CurLexer->KeepCommentMode = KeepComments; + } + + // Finally, if we are out of the conditional (saw an #endif or ran off the end + // of the file, just stop skipping and return to lexing whatever came after + // the #if block. + CurLexer->LexingRawMode = false; +} + +//===----------------------------------------------------------------------===// +// Preprocessor Directive Handling. +//===----------------------------------------------------------------------===// + +/// HandleDirective - This callback is invoked when the lexer sees a # token +/// at the start of a line. This consumes the directive, modifies the +/// lexer/preprocessor state, and advances the lexer(s) so that the next token +/// read is the correct one. +void Preprocessor::HandleDirective(LexerToken &Result) { + // FIXME: Traditional: # with whitespace before it not recognized by K&R? + + // We just parsed a # character at the start of a line, so we're in directive + // mode. Tell the lexer this so any newlines we see will be converted into an + // EOM token (which terminates the directive). + CurLexer->ParsingPreprocessorDirective = true; + + ++NumDirectives; + + // We are about to read a token. For the multiple-include optimization FA to + // work, we have to remember if we had read any tokens *before* this + // pp-directive. + bool ReadAnyTokensBeforeDirective = CurLexer->MIOpt.getHasReadAnyTokensVal(); + + // Read the next token, the directive flavor. This isn't expanded due to + // C99 6.10.3p8. + LexUnexpandedToken(Result); + + // C99 6.10.3p11: Is this preprocessor directive in macro invocation? e.g.: + // #define A(x) #x + // A(abc + // #warning blah + // def) + // If so, the user is relying on non-portable behavior, emit a diagnostic. + if (InMacroArgs) + Diag(Result, diag::ext_embedded_directive); + +TryAgain: + switch (Result.getKind()) { + case tok::eom: + return; // null directive. + case tok::comment: + // Handle stuff like "# /*foo*/ define X" in -E -C mode. + LexUnexpandedToken(Result); + goto TryAgain; + + case tok::numeric_constant: + // FIXME: implement # 7 line numbers! + DiscardUntilEndOfDirective(); + return; + default: + IdentifierInfo *II = Result.getIdentifierInfo(); + if (II == 0) break; // Not an identifier. + + // Ask what the preprocessor keyword ID is. + switch (II->getPPKeywordID()) { + default: break; + // C99 6.10.1 - Conditional Inclusion. + case tok::pp_if: + return HandleIfDirective(Result, ReadAnyTokensBeforeDirective); + case tok::pp_ifdef: + return HandleIfdefDirective(Result, false, true/*not valid for miopt*/); + case tok::pp_ifndef: + return HandleIfdefDirective(Result, true, ReadAnyTokensBeforeDirective); + case tok::pp_elif: + return HandleElifDirective(Result); + case tok::pp_else: + return HandleElseDirective(Result); + case tok::pp_endif: + return HandleEndifDirective(Result); + + // C99 6.10.2 - Source File Inclusion. + case tok::pp_include: + return HandleIncludeDirective(Result); // Handle #include. + + // C99 6.10.3 - Macro Replacement. + case tok::pp_define: + return HandleDefineDirective(Result, false); + case tok::pp_undef: + return HandleUndefDirective(Result); + + // C99 6.10.4 - Line Control. + case tok::pp_line: + // FIXME: implement #line + DiscardUntilEndOfDirective(); + return; + + // C99 6.10.5 - Error Directive. + case tok::pp_error: + return HandleUserDiagnosticDirective(Result, false); + + // C99 6.10.6 - Pragma Directive. + case tok::pp_pragma: + return HandlePragmaDirective(); + + // GNU Extensions. + case tok::pp_import: + return HandleImportDirective(Result); + case tok::pp_include_next: + return HandleIncludeNextDirective(Result); + + case tok::pp_warning: + Diag(Result, diag::ext_pp_warning_directive); + return HandleUserDiagnosticDirective(Result, true); + case tok::pp_ident: + return HandleIdentSCCSDirective(Result); + case tok::pp_sccs: + return HandleIdentSCCSDirective(Result); + case tok::pp_assert: + //isExtension = true; // FIXME: implement #assert + break; + case tok::pp_unassert: + //isExtension = true; // FIXME: implement #unassert + break; + + // clang extensions. + case tok::pp_define_target: + return HandleDefineDirective(Result, true); + case tok::pp_define_other_target: + return HandleDefineOtherTargetDirective(Result); + } + break; + } + + // If we reached here, the preprocessing token is not valid! + Diag(Result, diag::err_pp_invalid_directive); + + // Read the rest of the PP line. + DiscardUntilEndOfDirective(); + + // Okay, we're done parsing the directive. +} + +void Preprocessor::HandleUserDiagnosticDirective(LexerToken &Tok, + bool isWarning) { + // Read the rest of the line raw. We do this because we don't want macros + // to be expanded and we don't require that the tokens be valid preprocessing + // tokens. For example, this is allowed: "#warning ` 'foo". GCC does + // collapse multiple consequtive white space between tokens, but this isn't + // specified by the standard. + std::string Message = CurLexer->ReadToEndOfLine(); + + unsigned DiagID = isWarning ? diag::pp_hash_warning : diag::err_pp_hash_error; + return Diag(Tok, DiagID, Message); +} + +/// HandleIdentSCCSDirective - Handle a #ident/#sccs directive. +/// +void Preprocessor::HandleIdentSCCSDirective(LexerToken &Tok) { + // Yes, this directive is an extension. + Diag(Tok, diag::ext_pp_ident_directive); + + // Read the string argument. + LexerToken StrTok; + Lex(StrTok); + + // If the token kind isn't a string, it's a malformed directive. + if (StrTok.getKind() != tok::string_literal && + StrTok.getKind() != tok::wide_string_literal) + return Diag(StrTok, diag::err_pp_malformed_ident); + + // Verify that there is nothing after the string, other than EOM. + CheckEndOfDirective("#ident"); + + if (Callbacks) + Callbacks->Ident(Tok.getLocation(), getSpelling(StrTok)); +} + +//===----------------------------------------------------------------------===// +// Preprocessor Include Directive Handling. +//===----------------------------------------------------------------------===// + +/// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully +/// checked and spelled filename, e.g. as an operand of #include. This returns +/// true if the input filename was in <>'s or false if it were in ""'s. The +/// caller is expected to provide a buffer that is large enough to hold the +/// spelling of the filename, but is also expected to handle the case when +/// this method decides to use a different buffer. +bool Preprocessor::GetIncludeFilenameSpelling(const LexerToken &FilenameTok, + const char *&BufStart, + const char *&BufEnd) { + // Get the text form of the filename. + unsigned Len = getSpelling(FilenameTok, BufStart); + BufEnd = BufStart+Len; + assert(BufStart != BufEnd && "Can't have tokens with empty spellings!"); + + // Make sure the filename is <x> or "x". + bool isAngled; + if (BufStart[0] == '<') { + if (BufEnd[-1] != '>') { + Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); + BufStart = 0; + return true; + } + isAngled = true; + } else if (BufStart[0] == '"') { + if (BufEnd[-1] != '"') { + Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); + BufStart = 0; + return true; + } + isAngled = false; + } else { + Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); + BufStart = 0; + return true; + } + + // Diagnose #include "" as invalid. + if (BufEnd-BufStart <= 2) { + Diag(FilenameTok.getLocation(), diag::err_pp_empty_filename); + BufStart = 0; + return ""; + } + + // Skip the brackets. + ++BufStart; + --BufEnd; + return isAngled; +} + +/// HandleIncludeDirective - The "#include" tokens have just been read, read the +/// file to be included from the lexer, then include it! This is a common +/// routine with functionality shared between #include, #include_next and +/// #import. +void Preprocessor::HandleIncludeDirective(LexerToken &IncludeTok, + const DirectoryLookup *LookupFrom, + bool isImport) { + + LexerToken FilenameTok; + CurLexer->LexIncludeFilename(FilenameTok); + + // If the token kind is EOM, the error has already been diagnosed. + if (FilenameTok.getKind() == tok::eom) + return; + + // Reserve a buffer to get the spelling. + llvm::SmallVector<char, 128> FilenameBuffer; + FilenameBuffer.resize(FilenameTok.getLength()); + + const char *FilenameStart = &FilenameBuffer[0], *FilenameEnd; + bool isAngled = GetIncludeFilenameSpelling(FilenameTok, + FilenameStart, FilenameEnd); + // If GetIncludeFilenameSpelling set the start ptr to null, there was an + // error. + if (FilenameStart == 0) + return; + + // Verify that there is nothing after the filename, other than EOM. Use the + // preprocessor to lex this in case lexing the filename entered a macro. + CheckEndOfDirective("#include"); + + // Check that we don't have infinite #include recursion. + if (IncludeMacroStack.size() == MaxAllowedIncludeStackDepth-1) + return Diag(FilenameTok, diag::err_pp_include_too_deep); + + // Search include directories. + const DirectoryLookup *CurDir; + const FileEntry *File = LookupFile(FilenameStart, FilenameEnd, + isAngled, LookupFrom, CurDir); + if (File == 0) + return Diag(FilenameTok, diag::err_pp_file_not_found, + std::string(FilenameStart, FilenameEnd)); + + // Ask HeaderInfo if we should enter this #include file. + if (!HeaderInfo.ShouldEnterIncludeFile(File, isImport)) { + // If it returns true, #including this file will have no effect. + return; + } + + // Look up the file, create a File ID for it. + unsigned FileID = SourceMgr.createFileID(File, FilenameTok.getLocation()); + if (FileID == 0) + return Diag(FilenameTok, diag::err_pp_file_not_found, + std::string(FilenameStart, FilenameEnd)); + + // Finally, if all is good, enter the new file! + EnterSourceFile(FileID, CurDir); +} + +/// HandleIncludeNextDirective - Implements #include_next. +/// +void Preprocessor::HandleIncludeNextDirective(LexerToken &IncludeNextTok) { + Diag(IncludeNextTok, diag::ext_pp_include_next_directive); + + // #include_next is like #include, except that we start searching after + // the current found directory. If we can't do this, issue a + // diagnostic. + const DirectoryLookup *Lookup = CurDirLookup; + if (isInPrimaryFile()) { + Lookup = 0; + Diag(IncludeNextTok, diag::pp_include_next_in_primary); + } else if (Lookup == 0) { + Diag(IncludeNextTok, diag::pp_include_next_absolute_path); + } else { + // Start looking up in the next directory. + ++Lookup; + } + + return HandleIncludeDirective(IncludeNextTok, Lookup); +} + +/// HandleImportDirective - Implements #import. +/// +void Preprocessor::HandleImportDirective(LexerToken &ImportTok) { + Diag(ImportTok, diag::ext_pp_import_directive); + + return HandleIncludeDirective(ImportTok, 0, true); +} + +//===----------------------------------------------------------------------===// +// Preprocessor Macro Directive Handling. +//===----------------------------------------------------------------------===// + +/// ReadMacroDefinitionArgList - The ( starting an argument list of a macro +/// definition has just been read. Lex the rest of the arguments and the +/// closing ), updating MI with what we learn. Return true if an error occurs +/// parsing the arg list. +bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI) { + LexerToken Tok; + while (1) { + LexUnexpandedToken(Tok); + switch (Tok.getKind()) { + case tok::r_paren: + // Found the end of the argument list. + if (MI->arg_begin() == MI->arg_end()) return false; // #define FOO() + // Otherwise we have #define FOO(A,) + Diag(Tok, diag::err_pp_expected_ident_in_arg_list); + return true; + case tok::ellipsis: // #define X(... -> C99 varargs + // Warn if use of C99 feature in non-C99 mode. + if (!Features.C99) Diag(Tok, diag::ext_variadic_macro); + + // Lex the token after the identifier. + LexUnexpandedToken(Tok); + if (Tok.getKind() != tok::r_paren) { + Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); + return true; + } + // Add the __VA_ARGS__ identifier as an argument. + MI->addArgument(Ident__VA_ARGS__); + MI->setIsC99Varargs(); + return false; + case tok::eom: // #define X( + Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); + return true; + default: + // Handle keywords and identifiers here to accept things like + // #define Foo(for) for. + IdentifierInfo *II = Tok.getIdentifierInfo(); + if (II == 0) { + // #define X(1 + Diag(Tok, diag::err_pp_invalid_tok_in_arg_list); + return true; + } + + // If this is already used as an argument, it is used multiple times (e.g. + // #define X(A,A. + if (MI->getArgumentNum(II) != -1) { // C99 6.10.3p6 + Diag(Tok, diag::err_pp_duplicate_name_in_arg_list, II->getName()); + return true; + } + + // Add the argument to the macro info. + MI->addArgument(II); + + // Lex the token after the identifier. + LexUnexpandedToken(Tok); + + switch (Tok.getKind()) { + default: // #define X(A B + Diag(Tok, diag::err_pp_expected_comma_in_arg_list); + return true; + case tok::r_paren: // #define X(A) + return false; + case tok::comma: // #define X(A, + break; + case tok::ellipsis: // #define X(A... -> GCC extension + // Diagnose extension. + Diag(Tok, diag::ext_named_variadic_macro); + + // Lex the token after the identifier. + LexUnexpandedToken(Tok); + if (Tok.getKind() != tok::r_paren) { + Diag(Tok, diag::err_pp_missing_rparen_in_macro_def); + return true; + } + + MI->setIsGNUVarargs(); + return false; + } + } + } +} + +/// HandleDefineDirective - Implements #define. This consumes the entire macro +/// line then lets the caller lex the next real token. If 'isTargetSpecific' is +/// true, then this is a "#define_target", otherwise this is a "#define". +/// +void Preprocessor::HandleDefineDirective(LexerToken &DefineTok, + bool isTargetSpecific) { + ++NumDefined; + + LexerToken MacroNameTok; + ReadMacroName(MacroNameTok, 1); + + // Error reading macro name? If so, diagnostic already issued. + if (MacroNameTok.getKind() == tok::eom) + return; + + // If we are supposed to keep comments in #defines, reenable comment saving + // mode. + CurLexer->KeepCommentMode = KeepMacroComments; + + // Create the new macro. + MacroInfo *MI = new MacroInfo(MacroNameTok.getLocation()); + if (isTargetSpecific) MI->setIsTargetSpecific(); + + // If the identifier is an 'other target' macro, clear this bit. + MacroNameTok.getIdentifierInfo()->setIsOtherTargetMacro(false); + + + LexerToken Tok; + LexUnexpandedToken(Tok); + + // If this is a function-like macro definition, parse the argument list, + // marking each of the identifiers as being used as macro arguments. Also, + // check other constraints on the first token of the macro body. + if (Tok.getKind() == tok::eom) { + // If there is no body to this macro, we have no special handling here. + } else if (Tok.getKind() == tok::l_paren && !Tok.hasLeadingSpace()) { + // This is a function-like macro definition. Read the argument list. + MI->setIsFunctionLike(); + if (ReadMacroDefinitionArgList(MI)) { + // Forget about MI. + delete MI; + // Throw away the rest of the line. + if (CurLexer->ParsingPreprocessorDirective) + DiscardUntilEndOfDirective(); + return; + } + + // Read the first token after the arg list for down below. + LexUnexpandedToken(Tok); + } else if (!Tok.hasLeadingSpace()) { + // C99 requires whitespace between the macro definition and the body. Emit + // a diagnostic for something like "#define X+". + if (Features.C99) { + Diag(Tok, diag::ext_c99_whitespace_required_after_macro_name); + } else { + // FIXME: C90/C++ do not get this diagnostic, but it does get a similar + // one in some cases! + } + } else { + // This is a normal token with leading space. Clear the leading space + // marker on the first token to get proper expansion. + Tok.clearFlag(LexerToken::LeadingSpace); + } + + // If this is a definition of a variadic C99 function-like macro, not using + // the GNU named varargs extension, enabled __VA_ARGS__. + + // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. + // This gets unpoisoned where it is allowed. + assert(Ident__VA_ARGS__->isPoisoned() && "__VA_ARGS__ should be poisoned!"); + if (MI->isC99Varargs()) + Ident__VA_ARGS__->setIsPoisoned(false); + + // Read the rest of the macro body. + while (Tok.getKind() != tok::eom) { + MI->AddTokenToBody(Tok); + + // Check C99 6.10.3.2p1: ensure that # operators are followed by macro + // parameters in function-like macro expansions. + if (Tok.getKind() != tok::hash || MI->isObjectLike()) { + // Get the next token of the macro. + LexUnexpandedToken(Tok); + continue; + } + + // Get the next token of the macro. + LexUnexpandedToken(Tok); + + // Not a macro arg identifier? + if (!Tok.getIdentifierInfo() || + MI->getArgumentNum(Tok.getIdentifierInfo()) == -1) { + Diag(Tok, diag::err_pp_stringize_not_parameter); + delete MI; + + // Disable __VA_ARGS__ again. + Ident__VA_ARGS__->setIsPoisoned(true); + return; + } + + // Things look ok, add the param name token to the macro. + MI->AddTokenToBody(Tok); + + // Get the next token of the macro. + LexUnexpandedToken(Tok); + } + + // Disable __VA_ARGS__ again. + Ident__VA_ARGS__->setIsPoisoned(true); + + // Check that there is no paste (##) operator at the begining or end of the + // replacement list. + unsigned NumTokens = MI->getNumTokens(); + if (NumTokens != 0) { + if (MI->getReplacementToken(0).getKind() == tok::hashhash) { + Diag(MI->getReplacementToken(0), diag::err_paste_at_start); + delete MI; + return; + } + if (MI->getReplacementToken(NumTokens-1).getKind() == tok::hashhash) { + Diag(MI->getReplacementToken(NumTokens-1), diag::err_paste_at_end); + delete MI; + return; + } + } + + // If this is the primary source file, remember that this macro hasn't been + // used yet. + if (isInPrimaryFile()) + MI->setIsUsed(false); + + // Finally, if this identifier already had a macro defined for it, verify that + // the macro bodies are identical and free the old definition. + if (MacroInfo *OtherMI = MacroNameTok.getIdentifierInfo()->getMacroInfo()) { + if (!OtherMI->isUsed()) + Diag(OtherMI->getDefinitionLoc(), diag::pp_macro_not_used); + + // Macros must be identical. This means all tokes and whitespace separation + // must be the same. C99 6.10.3.2. + if (!MI->isIdenticalTo(*OtherMI, *this)) { + Diag(MI->getDefinitionLoc(), diag::ext_pp_macro_redef, + MacroNameTok.getIdentifierInfo()->getName()); + Diag(OtherMI->getDefinitionLoc(), diag::ext_pp_macro_redef2); + } + delete OtherMI; + } + + MacroNameTok.getIdentifierInfo()->setMacroInfo(MI); +} + +/// HandleDefineOtherTargetDirective - Implements #define_other_target. +void Preprocessor::HandleDefineOtherTargetDirective(LexerToken &Tok) { + LexerToken MacroNameTok; + ReadMacroName(MacroNameTok, 1); + + // Error reading macro name? If so, diagnostic already issued. + if (MacroNameTok.getKind() == tok::eom) + return; + + // Check to see if this is the last token on the #undef line. + CheckEndOfDirective("#define_other_target"); + + // If there is already a macro defined by this name, turn it into a + // target-specific define. + if (MacroInfo *MI = MacroNameTok.getIdentifierInfo()->getMacroInfo()) { + MI->setIsTargetSpecific(true); + return; + } + + // Mark the identifier as being a macro on some other target. + MacroNameTok.getIdentifierInfo()->setIsOtherTargetMacro(); +} + + +/// HandleUndefDirective - Implements #undef. +/// +void Preprocessor::HandleUndefDirective(LexerToken &UndefTok) { + ++NumUndefined; + + LexerToken MacroNameTok; + ReadMacroName(MacroNameTok, 2); + + // Error reading macro name? If so, diagnostic already issued. + if (MacroNameTok.getKind() == tok::eom) + return; + + // Check to see if this is the last token on the #undef line. + CheckEndOfDirective("#undef"); + + // Okay, we finally have a valid identifier to undef. + MacroInfo *MI = MacroNameTok.getIdentifierInfo()->getMacroInfo(); + + // #undef untaints an identifier if it were marked by define_other_target. + MacroNameTok.getIdentifierInfo()->setIsOtherTargetMacro(false); + + // If the macro is not defined, this is a noop undef, just return. + if (MI == 0) return; + + if (!MI->isUsed()) + Diag(MI->getDefinitionLoc(), diag::pp_macro_not_used); + + // Free macro definition. + delete MI; + MacroNameTok.getIdentifierInfo()->setMacroInfo(0); +} + + +//===----------------------------------------------------------------------===// +// Preprocessor Conditional Directive Handling. +//===----------------------------------------------------------------------===// + +/// HandleIfdefDirective - Implements the #ifdef/#ifndef directive. isIfndef is +/// true when this is a #ifndef directive. ReadAnyTokensBeforeDirective is true +/// if any tokens have been returned or pp-directives activated before this +/// #ifndef has been lexed. +/// +void Preprocessor::HandleIfdefDirective(LexerToken &Result, bool isIfndef, + bool ReadAnyTokensBeforeDirective) { + ++NumIf; + LexerToken DirectiveTok = Result; + + LexerToken MacroNameTok; + ReadMacroName(MacroNameTok); + + // Error reading macro name? If so, diagnostic already issued. + if (MacroNameTok.getKind() == tok::eom) + return; + + // Check to see if this is the last token on the #if[n]def line. + CheckEndOfDirective(isIfndef ? "#ifndef" : "#ifdef"); + + // If the start of a top-level #ifdef, inform MIOpt. + if (!ReadAnyTokensBeforeDirective && + CurLexer->getConditionalStackDepth() == 0) { + assert(isIfndef && "#ifdef shouldn't reach here"); + CurLexer->MIOpt.EnterTopLevelIFNDEF(MacroNameTok.getIdentifierInfo()); + } + + IdentifierInfo *MII = MacroNameTok.getIdentifierInfo(); + MacroInfo *MI = MII->getMacroInfo(); + + // If there is a macro, process it. + if (MI) { + // Mark it used. + MI->setIsUsed(true); + + // If this is the first use of a target-specific macro, warn about it. + if (MI->isTargetSpecific()) { + MI->setIsTargetSpecific(false); // Don't warn on second use. + getTargetInfo().DiagnoseNonPortability(MacroNameTok.getLocation(), + diag::port_target_macro_use); + } + } else { + // Use of a target-specific macro for some other target? If so, warn. + if (MII->isOtherTargetMacro()) { + MII->setIsOtherTargetMacro(false); // Don't warn on second use. + getTargetInfo().DiagnoseNonPortability(MacroNameTok.getLocation(), + diag::port_target_macro_use); + } + } + + // Should we include the stuff contained by this directive? + if (!MI == isIfndef) { + // Yes, remember that we are inside a conditional, then lex the next token. + CurLexer->pushConditionalLevel(DirectiveTok.getLocation(), /*wasskip*/false, + /*foundnonskip*/true, /*foundelse*/false); + } else { + // No, skip the contents of this block and return the first token after it. + SkipExcludedConditionalBlock(DirectiveTok.getLocation(), + /*Foundnonskip*/false, + /*FoundElse*/false); + } +} + +/// HandleIfDirective - Implements the #if directive. +/// +void Preprocessor::HandleIfDirective(LexerToken &IfToken, + bool ReadAnyTokensBeforeDirective) { + ++NumIf; + + // Parse and evaluation the conditional expression. + IdentifierInfo *IfNDefMacro = 0; + bool ConditionalTrue = EvaluateDirectiveExpression(IfNDefMacro); + + // Should we include the stuff contained by this directive? + if (ConditionalTrue) { + // If this condition is equivalent to #ifndef X, and if this is the first + // directive seen, handle it for the multiple-include optimization. + if (!ReadAnyTokensBeforeDirective && + CurLexer->getConditionalStackDepth() == 0 && IfNDefMacro) + CurLexer->MIOpt.EnterTopLevelIFNDEF(IfNDefMacro); + + // Yes, remember that we are inside a conditional, then lex the next token. + CurLexer->pushConditionalLevel(IfToken.getLocation(), /*wasskip*/false, + /*foundnonskip*/true, /*foundelse*/false); + } else { + // No, skip the contents of this block and return the first token after it. + SkipExcludedConditionalBlock(IfToken.getLocation(), /*Foundnonskip*/false, + /*FoundElse*/false); + } +} + +/// HandleEndifDirective - Implements the #endif directive. +/// +void Preprocessor::HandleEndifDirective(LexerToken &EndifToken) { + ++NumEndif; + + // Check that this is the whole directive. + CheckEndOfDirective("#endif"); + + PPConditionalInfo CondInfo; + if (CurLexer->popConditionalLevel(CondInfo)) { + // No conditionals on the stack: this is an #endif without an #if. + return Diag(EndifToken, diag::err_pp_endif_without_if); + } + + // If this the end of a top-level #endif, inform MIOpt. + if (CurLexer->getConditionalStackDepth() == 0) + CurLexer->MIOpt.ExitTopLevelConditional(); + + assert(!CondInfo.WasSkipping && !CurLexer->LexingRawMode && + "This code should only be reachable in the non-skipping case!"); +} + + +void Preprocessor::HandleElseDirective(LexerToken &Result) { + ++NumElse; + + // #else directive in a non-skipping conditional... start skipping. + CheckEndOfDirective("#else"); + + PPConditionalInfo CI; + if (CurLexer->popConditionalLevel(CI)) + return Diag(Result, diag::pp_err_else_without_if); + + // If this is a top-level #else, inform the MIOpt. + if (CurLexer->getConditionalStackDepth() == 0) + CurLexer->MIOpt.FoundTopLevelElse(); + + // If this is a #else with a #else before it, report the error. + if (CI.FoundElse) Diag(Result, diag::pp_err_else_after_else); + + // Finally, skip the rest of the contents of this block and return the first + // token after it. + return SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true, + /*FoundElse*/true); +} + +void Preprocessor::HandleElifDirective(LexerToken &ElifToken) { + ++NumElse; + + // #elif directive in a non-skipping conditional... start skipping. + // We don't care what the condition is, because we will always skip it (since + // the block immediately before it was included). + DiscardUntilEndOfDirective(); + + PPConditionalInfo CI; + if (CurLexer->popConditionalLevel(CI)) + return Diag(ElifToken, diag::pp_err_elif_without_if); + + // If this is a top-level #elif, inform the MIOpt. + if (CurLexer->getConditionalStackDepth() == 0) + CurLexer->MIOpt.FoundTopLevelElse(); + + // If this is a #elif with a #else before it, report the error. + if (CI.FoundElse) Diag(ElifToken, diag::pp_err_elif_after_else); + + // Finally, skip the rest of the contents of this block and return the first + // token after it. + return SkipExcludedConditionalBlock(CI.IfLoc, /*Foundnonskip*/true, + /*FoundElse*/CI.FoundElse); +} + diff --git a/Lex/ScratchBuffer.cpp b/Lex/ScratchBuffer.cpp new file mode 100644 index 0000000000..12cb0965ce --- /dev/null +++ b/Lex/ScratchBuffer.cpp @@ -0,0 +1,71 @@ +//===--- ScratchBuffer.cpp - Scratch space for forming tokens -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ScratchBuffer interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/ScratchBuffer.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/Support/MemoryBuffer.h" +using namespace clang; + +// ScratchBufSize - The size of each chunk of scratch memory. Slightly less +//than a page, almost certainly enough for anything. :) +static const unsigned ScratchBufSize = 4060; + +ScratchBuffer::ScratchBuffer(SourceManager &SM) : SourceMgr(SM), CurBuffer(0) { + // Set BytesUsed so that the first call to getToken will require an alloc. + BytesUsed = ScratchBufSize; + FileID = 0; +} + +/// getToken - Splat the specified text into a temporary MemoryBuffer and +/// return a SourceLocation that refers to the token. This is just like the +/// method below, but returns a location that indicates the physloc of the +/// token. +SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len) { + if (BytesUsed+Len > ScratchBufSize) + AllocScratchBuffer(Len); + + // Copy the token data into the buffer. + memcpy(CurBuffer+BytesUsed, Buf, Len); + + // Remember that we used these bytes. + BytesUsed += Len; + + assert(BytesUsed-Len < (1 << SourceLocation::FilePosBits) && + "Out of range file position!"); + + return SourceLocation(FileID, BytesUsed-Len); +} + + +/// getToken - Splat the specified text into a temporary MemoryBuffer and +/// return a SourceLocation that refers to the token. The SourceLoc value +/// gives a virtual location that the token will appear to be from. +SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len, + SourceLocation SourceLoc) { + // Map the physloc to the specified sourceloc. + return SourceMgr.getInstantiationLoc(getToken(Buf, Len), SourceLoc); +} + +void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) { + // Only pay attention to the requested length if it is larger than our default + // page size. If it is, we allocate an entire chunk for it. This is to + // support gigantic tokens, which almost certainly won't happen. :) + if (RequestLen < ScratchBufSize) + RequestLen = ScratchBufSize; + + llvm::MemoryBuffer *Buf = + llvm::MemoryBuffer::getNewMemBuffer(RequestLen, "<scratch space>"); + FileID = SourceMgr.createFileIDForMemBuffer(Buf); + CurBuffer = const_cast<char*>(Buf->getBufferStart()); + BytesUsed = 0; +} diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..623c0ffde0 --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +LEVEL = ../.. +DIRS := Basic Lex Parse AST Sema CodeGen Driver + +include $(LEVEL)/Makefile.common + +test:: + cd test; $(MAKE) + +clean:: + @rm -rf build + @rm -rf `find test -name Output` diff --git a/ModuleInfo.txt b/ModuleInfo.txt deleted file mode 100644 index 4368ef067a..0000000000 --- a/ModuleInfo.txt +++ /dev/null @@ -1,5 +0,0 @@ -# This file provides information for llvm-top -DepModule: llvm -ConfigCmd: -ConfigTest: -BuildCmd: diff --git a/NOTES.txt b/NOTES.txt new file mode 100644 index 0000000000..da8421112b --- /dev/null +++ b/NOTES.txt @@ -0,0 +1,218 @@ +//===---------------------------------------------------------------------===// +// Random Notes +//===---------------------------------------------------------------------===// + +C90/C99/C++ Comparisons: +http://david.tribble.com/text/cdiffs.htm + +//===---------------------------------------------------------------------===// +Extensions: + + * "#define_target X Y" + This preprocessor directive works exactly the same was as #define, but it + notes that 'X' is a target-specific preprocessor directive. When used, a + diagnostic is emitted indicating that the translation unit is non-portable. + + If a target-define is #undef'd before use, no diagnostic is emitted. If 'X' + were previously a normal #define macro, the macro is tainted. If 'X' is + subsequently #defined as a non-target-specific define, the taint bit is + cleared. + + * "#define_other_target X" + The preprocessor directive takes a single identifier argument. It notes + that this identifier is a target-specific #define for some target other than + the current one. Use of this identifier will result in a diagnostic. + + If 'X' is later #undef'd or #define'd, the taint bit is cleared. If 'X' is + already defined, X is marked as a target-specific define. + +//===---------------------------------------------------------------------===// + +To time GCC preprocessing speed without output, use: + "time gcc -MM file" +This is similar to -Eonly. + + +//===---------------------------------------------------------------------===// + + C++ Template Instantiation benchmark: + http://users.rcn.com/abrahams/instantiation_speed/index.html + +//===---------------------------------------------------------------------===// + +TODO: File Manager Speedup: + + We currently do a lot of stat'ing for files that don't exist, particularly + when lots of -I paths exist (e.g. see the <iostream> example, check for + failures in stat in FileManager::getFile). It would be far better to make + the following changes: + 1. FileEntry contains a sys::Path instead of a std::string for Name. + 2. sys::Path contains timestamp and size, lazily computed. Eliminate from + FileEntry. + 3. File UIDs are created on request, not when files are opened. + These changes make it possible to efficiently have FileEntry objects for + files that exist on the file system, but have not been used yet. + + Once this is done: + 1. DirectoryEntry gets a boolean value "has read entries". When false, not + all entries in the directory are in the file mgr, when true, they are. + 2. Instead of stat'ing the file in FileManager::getFile, check to see if + the dir has been read. If so, fail immediately, if not, read the dir, + then retry. + 3. Reading the dir uses the getdirentries syscall, creating an FileEntry + for all files found. + +//===---------------------------------------------------------------------===// + +TODO: Fast #Import: + + * Get frameworks that don't use #import to do so, e.g. + DirectoryService, AudioToolbox, CoreFoundation, etc. Why not using #import? + Because they work in C mode? C has #import. + * Have the lexer return a token for #import instead of handling it itself. + - Create a new preprocessor object with no external state (no -D/U options + from the command line, etc). Alternatively, keep track of exactly which + external state is used by a #import: declare it somehow. + * When having reading a #import file, keep track of whether we have (and/or + which) seen any "configuration" macros. Various cases: + - Uses of target args (__POWERPC__, __i386): Header has to be parsed + multiple times, per-target. What about #ifndef checks? How do we know? + - "Configuration" preprocessor macros not defined: POWERPC, etc. What about + things like __STDC__ etc? What is and what isn't allowed. + * Special handling for "umbrella" headers, which just contain #import stmts: + - Cocoa.h/AppKit.h - Contain pointers to digests instead of entire digests + themselves? Foundation.h isn't pure umbrella! + * Frameworks digests: + - Can put "digest" of a framework-worth of headers into the framework + itself. To open AppKit, just mmap + /System/Library/Frameworks/AppKit.framework/"digest", which provides a + symbol table in a well defined format. Lazily unstream stuff that is + needed. Contains declarations, macros, and debug information. + - System frameworks ship with digests. How do we handle configuration + information? How do we handle stuff like: + #if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_2 + which guards a bunch of decls? Should there be a couple of default + configs, then have the UI fall back to building/caching its own? + - GUI automatically builds digests when UI is idle, both of system + frameworks if they aren't not available in the right config, and of app + frameworks. + - GUI builds dependence graph of frameworks/digests based on #imports. If a + digest is out date, dependent digests are automatically invalidated. + + * New constraints on #import for objc-v3: + - #imported file must not define non-inline function bodies. + - Alternatively, they can, and these bodies get compiled/linked *once* + per app into a dylib. What about building user dylibs? + - Restrictions on ObjC grammar: can't #import the body of a for stmt or fn. + - Compiler must detect and reject these cases. + - #defines defined within a #import have two behaviors: + - By default, they escape the header. These macros *cannot* be #undef'd + by other code: this is enforced by the front-end. + - Optionally, user can specify what macros escape (whitelist) or can use + #undef. + +//===---------------------------------------------------------------------===// + +TODO: New language feature: Configuration queries: + - Instead of #ifdef __POWERPC__, use "if (strcmp(`cpu`, __POWERPC__))", or + some other, better, syntax. + - Use it to increase the number of "architecture-clean" #import'd files, + allowing a single index to be used for all fat slices. + +//===---------------------------------------------------------------------===// + +The 'portability' model in clang is sufficient to catch translation units (or +their parts) that are not portable, but it doesn't help if the system headers +are non-portable and not fixed. An alternative model that would be easy to use +is a 'tainting' scheme. Consider: + +int32_t +OSHostByteOrder(void) { +#if defined(__LITTLE_ENDIAN__) + return OSLittleEndian; +#elif defined(__BIG_ENDIAN__) + return OSBigEndian; +#else + return OSUnknownByteOrder; +#endif +} + +It would be trivial to mark 'OSHostByteOrder' as being non-portable (tainted) +instead of marking the entire translation unit. Then, if OSHostByteOrder is +never called/used by the current translation unit, the t-u wouldn't be marked +non-portable. However, there is no good way to handle stuff like: + +extern int X, Y; + +#ifndef __POWERPC__ +#define X Y +#endif + +int bar() { return X; } + +When compiling for powerpc, the #define is skipped, so it doesn't know that bar +uses a #define that is set on some other target. In practice, limited cases +could be handled by scanning the skipped region of a #if, but the fully general +case cannot be implemented efficiently. In this case, for example, the #define +in the protected region could be turned into either a #define_target or +#define_other_target as appropriate. The harder case is code like this (from +OSByteOrder.h): + + #if (defined(__ppc__) || defined(__ppc64__)) + #include <libkern/ppc/OSByteOrder.h> + #elif (defined(__i386__) || defined(__x86_64__)) + #include <libkern/i386/OSByteOrder.h> + #else + #include <libkern/machine/OSByteOrder.h> + #endif + +The realistic way to fix this is by having an initial #ifdef __llvm__ that +defines its contents in terms of the llvm bswap intrinsics. Other things should +be handled on a case-by-case basis. + + +We probably have to do something smarter like this in the future. The C++ header +<limits> contains a lot of code like this: + + static const int digits10 = __LDBL_DIG__; + static const int min_exponent = __LDBL_MIN_EXP__; + static const int min_exponent10 = __LDBL_MIN_10_EXP__; + static const float_denorm_style has_denorm + = bool(__LDBL_DENORM_MIN__) ? denorm_present : denorm_absent; + + ... since this isn't being used in an #ifdef, it should be easy enough to taint +the decl for these ivars. + + +/usr/include/sys/cdefs.h contains stuff like this: + +#if defined(__ppc__) +# if defined(__LDBL_MANT_DIG__) && defined(__DBL_MANT_DIG__) && \ + __LDBL_MANT_DIG__ > __DBL_MANT_DIG__ +# if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0 < 1040 +# define __DARWIN_LDBL_COMPAT(x) __asm("_" __STRING(x) "$LDBLStub") +# else +# define __DARWIN_LDBL_COMPAT(x) __asm("_" __STRING(x) "$LDBL128") +# endif +# define __DARWIN_LDBL_COMPAT2(x) __asm("_" __STRING(x) "$LDBL128") +# define __DARWIN_LONG_DOUBLE_IS_DOUBLE 0 +# else +# define __DARWIN_LDBL_COMPAT(x) /* nothing */ +# define __DARWIN_LDBL_COMPAT2(x) /* nothing */ +# define __DARWIN_LONG_DOUBLE_IS_DOUBLE 1 +# endif +#elif defined(__i386__) || defined(__ppc64__) || defined(__x86_64__) +# define __DARWIN_LDBL_COMPAT(x) /* nothing */ +# define __DARWIN_LDBL_COMPAT2(x) /* nothing */ +# define __DARWIN_LONG_DOUBLE_IS_DOUBLE 0 +#else +# error Unknown architecture +#endif + +An ideal way to solve this issue is to mark __DARWIN_LDBL_COMPAT / +__DARWIN_LDBL_COMPAT2 / __DARWIN_LONG_DOUBLE_IS_DOUBLE as being non-portable +because they depend on non-portable macros. In practice though, this may end +up being a serious problem: every use of printf will mark the translation unit +non-portable if targetting ppc32 and something else. + +//===---------------------------------------------------------------------===// diff --git a/Parse/AttributeList.cpp b/Parse/AttributeList.cpp new file mode 100644 index 0000000000..385dc03490 --- /dev/null +++ b/Parse/AttributeList.cpp @@ -0,0 +1,27 @@ +//===--- AttributeList.cpp --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Steve Naroff and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the AttributeList class implementation +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/AttributeList.h" +#include "clang/Lex/IdentifierTable.h" +using namespace clang; + +AttributeList::AttributeList(IdentifierInfo *aName, SourceLocation aLoc, + IdentifierInfo *pName, SourceLocation pLoc, + Action::ExprTy **elist, unsigned numargs, + AttributeList *n) + : AttrName(aName), AttrLoc(aLoc), ParmName(pName), ParmLoc(pLoc), + NumArgs(numargs), Next(n) { + Args = new Action::ExprTy*[numargs]; + for (unsigned i = 0; i != numargs; ++i) + Args[i] = elist[i]; +} diff --git a/Parse/DeclSpec.cpp b/Parse/DeclSpec.cpp new file mode 100644 index 0000000000..117ff12e59 --- /dev/null +++ b/Parse/DeclSpec.cpp @@ -0,0 +1,285 @@ +//===--- SemaDeclSpec.cpp - Declaration Specifier Semantic Analysis -------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements semantic analysis for declaration specifiers. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/DeclSpec.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" +using namespace clang; + +/// getParsedSpecifiers - Return a bitmask of which flavors of specifiers this +/// +unsigned DeclSpec::getParsedSpecifiers() const { + unsigned Res = 0; + if (StorageClassSpec != SCS_unspecified || + SCS_thread_specified) + Res |= PQ_StorageClassSpecifier; + + if (TypeQualifiers != TQ_unspecified) + Res |= PQ_TypeQualifier; + + if (hasTypeSpecifier()) + Res |= PQ_TypeSpecifier; + + if (FS_inline_specified) + Res |= PQ_FunctionSpecifier; + return Res; +} + +const char *DeclSpec::getSpecifierName(DeclSpec::SCS S) { + switch (S) { + default: assert(0 && "Unknown typespec!"); + case DeclSpec::SCS_unspecified: return "unspecified"; + case DeclSpec::SCS_typedef: return "typedef"; + case DeclSpec::SCS_extern: return "extern"; + case DeclSpec::SCS_static: return "static"; + case DeclSpec::SCS_auto: return "auto"; + case DeclSpec::SCS_register: return "register"; + } +} + +static bool BadSpecifier(DeclSpec::SCS S, const char *&PrevSpec) { + PrevSpec = DeclSpec::getSpecifierName(S); + return true; +} + +static bool BadSpecifier(DeclSpec::TSW W, const char *&PrevSpec) { + switch (W) { + case DeclSpec::TSW_unspecified: PrevSpec = "unspecified"; break; + case DeclSpec::TSW_short: PrevSpec = "short"; break; + case DeclSpec::TSW_long: PrevSpec = "long"; break; + case DeclSpec::TSW_longlong: PrevSpec = "long long"; break; + } + return true; +} + +static bool BadSpecifier(DeclSpec::TSC C, const char *&PrevSpec) { + switch (C) { + case DeclSpec::TSC_unspecified: PrevSpec = "unspecified"; break; + case DeclSpec::TSC_imaginary: PrevSpec = "imaginary"; break; + case DeclSpec::TSC_complex: PrevSpec = "complex"; break; + } + return true; +} + + +static bool BadSpecifier(DeclSpec::TSS S, const char *&PrevSpec) { + switch (S) { + case DeclSpec::TSS_unspecified: PrevSpec = "unspecified"; break; + case DeclSpec::TSS_signed: PrevSpec = "signed"; break; + case DeclSpec::TSS_unsigned: PrevSpec = "unsigned"; break; + } + return true; +} + +const char *DeclSpec::getSpecifierName(DeclSpec::TST T) { + switch (T) { + default: assert(0 && "Unknown typespec!"); + case DeclSpec::TST_unspecified: return "unspecified"; + case DeclSpec::TST_void: return "void"; + case DeclSpec::TST_char: return "char"; + case DeclSpec::TST_int: return "int"; + case DeclSpec::TST_float: return "float"; + case DeclSpec::TST_double: return "double"; + case DeclSpec::TST_bool: return "_Bool"; + case DeclSpec::TST_decimal32: return "_Decimal32"; + case DeclSpec::TST_decimal64: return "_Decimal64"; + case DeclSpec::TST_decimal128: return "_Decimal128"; + case DeclSpec::TST_enum: return "enum"; + case DeclSpec::TST_union: return "union"; + case DeclSpec::TST_struct: return "struct"; + case DeclSpec::TST_typedef: return "typedef"; + } +} + +static bool BadSpecifier(DeclSpec::TST T, const char *&PrevSpec) { + PrevSpec = DeclSpec::getSpecifierName(T); + return true; +} + +static bool BadSpecifier(DeclSpec::TQ T, const char *&PrevSpec) { + switch (T) { + case DeclSpec::TQ_unspecified: PrevSpec = "unspecified"; break; + case DeclSpec::TQ_const: PrevSpec = "const"; break; + case DeclSpec::TQ_restrict: PrevSpec = "restrict"; break; + case DeclSpec::TQ_volatile: PrevSpec = "volatile"; break; + } + return true; +} + +bool DeclSpec::SetStorageClassSpec(SCS S, SourceLocation Loc, + const char *&PrevSpec) { + if (StorageClassSpec != SCS_unspecified) + return BadSpecifier(StorageClassSpec, PrevSpec); + StorageClassSpec = S; + StorageClassSpecLoc = Loc; + return false; +} + +bool DeclSpec::SetStorageClassSpecThread(SourceLocation Loc, + const char *&PrevSpec) { + if (SCS_thread_specified) { + PrevSpec = "__thread"; + return true; + } + SCS_thread_specified = true; + SCS_threadLoc = Loc; + return false; +} + + +/// These methods set the specified attribute of the DeclSpec, but return true +/// and ignore the request if invalid (e.g. "extern" then "auto" is +/// specified). +bool DeclSpec::SetTypeSpecWidth(TSW W, SourceLocation Loc, + const char *&PrevSpec) { + if (TypeSpecWidth != TSW_unspecified && + // Allow turning long -> long long. + (W != TSW_longlong || TypeSpecWidth != TSW_long)) + return BadSpecifier(TypeSpecWidth, PrevSpec); + TypeSpecWidth = W; + TSWLoc = Loc; + return false; +} + +bool DeclSpec::SetTypeSpecComplex(TSC C, SourceLocation Loc, + const char *&PrevSpec) { + if (TypeSpecComplex != TSC_unspecified) + return BadSpecifier(TypeSpecComplex, PrevSpec); + TypeSpecComplex = C; + TSCLoc = Loc; + return false; +} + +bool DeclSpec::SetTypeSpecSign(TSS S, SourceLocation Loc, + const char *&PrevSpec) { + if (TypeSpecSign != TSS_unspecified) + return BadSpecifier(TypeSpecSign, PrevSpec); + TypeSpecSign = S; + TSSLoc = Loc; + return false; +} + +bool DeclSpec::SetTypeSpecType(TST T, SourceLocation Loc, + const char *&PrevSpec, void *Rep) { + if (TypeSpecType != TST_unspecified) + return BadSpecifier(TypeSpecType, PrevSpec); + TypeSpecType = T; + TypeRep = Rep; + TSTLoc = Loc; + return false; +} + +bool DeclSpec::SetTypeQual(TQ T, SourceLocation Loc, const char *&PrevSpec, + const LangOptions &Lang) { + // Duplicates turn into warnings pre-C99. + if ((TypeQualifiers & T) && !Lang.C99) + return BadSpecifier(T, PrevSpec); + TypeQualifiers |= T; + + switch (T) { + default: assert(0 && "Unknown type qualifier!"); + case TQ_const: TQ_constLoc = Loc; break; + case TQ_restrict: TQ_restrictLoc = Loc; break; + case TQ_volatile: TQ_volatileLoc = Loc; break; + } + return false; +} + +bool DeclSpec::SetFunctionSpecInline(SourceLocation Loc, const char *&PrevSpec){ + // 'inline inline' is ok. + FS_inline_specified = true; + FS_inlineLoc = Loc; + return false; +} + + +/// Finish - This does final analysis of the declspec, rejecting things like +/// "_Imaginary" (lacking an FP type). This returns a diagnostic to issue or +/// diag::NUM_DIAGNOSTICS if there is no error. After calling this method, +/// DeclSpec is guaranteed self-consistent, even if an error occurred. +void DeclSpec::Finish(Diagnostic &D, const LangOptions &Lang) { + // Check the type specifier components first. + + // signed/unsigned are only valid with int/char. + if (TypeSpecSign != TSS_unspecified) { + if (TypeSpecType == TST_unspecified) + TypeSpecType = TST_int; // unsigned -> unsigned int, signed -> signed int. + else if (TypeSpecType != TST_int && TypeSpecType != TST_char) { + Diag(D, TSSLoc, diag::err_invalid_sign_spec, + getSpecifierName(TypeSpecType)); + // signed double -> double. + TypeSpecSign = TSS_unspecified; + } + } + + // Validate the width of the type. + switch (TypeSpecWidth) { + case TSW_unspecified: break; + case TSW_short: // short int + case TSW_longlong: // long long int + if (TypeSpecType == TST_unspecified) + TypeSpecType = TST_int; // short -> short int, long long -> long long int. + else if (TypeSpecType != TST_int) { + Diag(D, TSWLoc, + TypeSpecWidth == TSW_short ? diag::err_invalid_short_spec + : diag::err_invalid_longlong_spec, + getSpecifierName(TypeSpecType)); + TypeSpecType = TST_int; + } + break; + case TSW_long: // long double, long int + if (TypeSpecType == TST_unspecified) + TypeSpecType = TST_int; // long -> long int. + else if (TypeSpecType != TST_int && TypeSpecType != TST_double) { + Diag(D, TSWLoc, diag::err_invalid_long_spec, + getSpecifierName(TypeSpecType)); + TypeSpecType = TST_int; + } + break; + } + + // TODO: if the implementation does not implement _Complex or _Imaginary, + // disallow their use. Need information about the backend. + if (TypeSpecComplex != TSC_unspecified) { + if (TypeSpecType == TST_unspecified) { + Diag(D, TSCLoc, diag::ext_plain_complex); + TypeSpecType = TST_double; // _Complex -> _Complex double. + } else if (TypeSpecType == TST_int || TypeSpecType == TST_char) { + // Note that this intentionally doesn't include _Complex _Bool. + Diag(D, TSTLoc, diag::ext_integer_complex); + } else if (TypeSpecType != TST_float && TypeSpecType != TST_double) { + Diag(D, TSCLoc, diag::err_invalid_complex_spec, + getSpecifierName(TypeSpecType)); + TypeSpecComplex = TSC_unspecified; + } + } + + // Verify __thread. + if (SCS_thread_specified) { + if (StorageClassSpec == SCS_unspecified) { + StorageClassSpec = SCS_extern; // '__thread int' -> 'extern __thread int' + } else if (StorageClassSpec != SCS_extern && + StorageClassSpec != SCS_static) { + Diag(D, getStorageClassSpecLoc(), diag::err_invalid_thread_spec, + getSpecifierName(StorageClassSpec)); + SCS_thread_specified = false; + } + } + + // Okay, now we can infer the real type. + // TODO: infer real type. + + // TODO: return "auto function" and other bad things based on the real type. + + // 'data definition has no type or storage class'? +} diff --git a/Parse/Makefile b/Parse/Makefile new file mode 100644 index 0000000000..60e70f240a --- /dev/null +++ b/Parse/Makefile @@ -0,0 +1,22 @@ +##===- clang/Parse/Makefile --------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file was developed by Chris Lattner and is distributed under +# the University of Illinois Open Source License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This implements the Parser library for the C-Language front-end. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME := clangParse +BUILD_ARCHIVE = 1 +CXXFLAGS = -fno-rtti + +CPPFLAGS += -I$(PROJ_SRC_DIR)/../include + +include $(LEVEL)/Makefile.common + diff --git a/Parse/MinimalAction.cpp b/Parse/MinimalAction.cpp new file mode 100644 index 0000000000..65bdca64db --- /dev/null +++ b/Parse/MinimalAction.cpp @@ -0,0 +1,105 @@ +//===--- MinimalAction.cpp - Implement the MinimalAction class ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MinimalAction interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/Parser.h" +#include "clang/Parse/DeclSpec.h" +#include "clang/Parse/Scope.h" +using namespace clang; + +/// TypeNameInfo - A link exists here for each scope that an identifier is +/// defined. +struct TypeNameInfo { + TypeNameInfo *Prev; + bool isTypeName; + + TypeNameInfo(bool istypename, TypeNameInfo *prev) { + isTypeName = istypename; + Prev = prev; + } +}; + +/// isTypeName - This looks at the IdentifierInfo::FETokenInfo field to +/// determine whether the name is a type name (objc class name or typedef) or +/// not in this scope. +Action::DeclTy * +MinimalAction::isTypeName(const IdentifierInfo &II, Scope *S) const { + if (TypeNameInfo *TI = II.getFETokenInfo<TypeNameInfo>()) + if (TI->isTypeName) + return TI; + return 0; +} + +/// ParseDeclarator - If this is a typedef declarator, we modify the +/// IdentifierInfo::FETokenInfo field to keep track of this fact, until S is +/// popped. +Action::DeclTy * +MinimalAction::ParseDeclarator(Scope *S, Declarator &D, ExprTy *Init, + DeclTy *LastInGroup) { + IdentifierInfo *II = D.getIdentifier(); + + // If there is no identifier associated with this declarator, bail out. + if (II == 0) return 0; + + TypeNameInfo *weCurrentlyHaveTypeInfo = II->getFETokenInfo<TypeNameInfo>(); + bool isTypeName = + D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_typedef; + + // this check avoids creating TypeNameInfo objects for the common case. + // It does need to handle the uncommon case of shadowing a typedef name with a + // non-typedef name. e.g. { typedef int a; a xx; { int a; } } + if (weCurrentlyHaveTypeInfo || isTypeName) { + TypeNameInfo *TI = new TypeNameInfo(isTypeName, weCurrentlyHaveTypeInfo); + + II->setFETokenInfo(TI); + + // Remember that this needs to be removed when the scope is popped. + S->AddDecl(II); + } + return 0; +} + +/// ParsedObjcClassDeclaration - +/// Scope will always be top level file scope. +Action::DeclTy * +MinimalAction::ParsedObjcClassDeclaration(Scope *S, + IdentifierInfo **IdentList, + unsigned NumElts) { + for (unsigned i = 0; i != NumElts; ++i) { + TypeNameInfo *TI = + new TypeNameInfo(1, IdentList[i]->getFETokenInfo<TypeNameInfo>()); + + IdentList[i]->setFETokenInfo(TI); + + // Remember that this needs to be removed when the scope is popped. + S->AddDecl(IdentList[i]); + } + return 0; +} + +/// PopScope - When a scope is popped, if any typedefs are now out-of-scope, +/// they are removed from the IdentifierInfo::FETokenInfo field. +void MinimalAction::PopScope(SourceLocation Loc, Scope *S) { + for (Scope::decl_iterator I = S->decl_begin(), E = S->decl_end(); + I != E; ++I) { + IdentifierInfo &II = *static_cast<IdentifierInfo*>(*I); + TypeNameInfo *TI = II.getFETokenInfo<TypeNameInfo>(); + assert(TI && "This decl didn't get pushed??"); + + if (TI) { + TypeNameInfo *Next = TI->Prev; + delete TI; + + II.setFETokenInfo(Next); + } + } +} diff --git a/Parse/ParseDecl.cpp b/Parse/ParseDecl.cpp new file mode 100644 index 0000000000..02a79e68e5 --- /dev/null +++ b/Parse/ParseDecl.cpp @@ -0,0 +1,1396 @@ +//===--- ParseDecl.cpp - Declaration Parsing ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Declaration portions of the Parser interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/Parser.h" +#include "clang/Parse/DeclSpec.h" +#include "llvm/ADT/SmallSet.h" +using namespace clang; + +//===----------------------------------------------------------------------===// +// C99 6.7: Declarations. +//===----------------------------------------------------------------------===// + +/// ParseTypeName +/// type-name: [C99 6.7.6] +/// specifier-qualifier-list abstract-declarator[opt] +Parser::TypeTy *Parser::ParseTypeName() { + // Parse the common declaration-specifiers piece. + DeclSpec DS; + ParseSpecifierQualifierList(DS); + + // Parse the abstract-declarator, if present. + Declarator DeclaratorInfo(DS, Declarator::TypeNameContext); + ParseDeclarator(DeclaratorInfo); + + return Actions.ParseTypeName(CurScope, DeclaratorInfo).Val; +} + +/// ParseAttributes - Parse a non-empty attributes list. +/// +/// [GNU] attributes: +/// attribute +/// attributes attribute +/// +/// [GNU] attribute: +/// '__attribute__' '(' '(' attribute-list ')' ')' +/// +/// [GNU] attribute-list: +/// attrib +/// attribute_list ',' attrib +/// +/// [GNU] attrib: +/// empty +/// attrib-name +/// attrib-name '(' identifier ')' +/// attrib-name '(' identifier ',' nonempty-expr-list ')' +/// attrib-name '(' argument-expression-list [C99 6.5.2] ')' +/// +/// [GNU] attrib-name: +/// identifier +/// typespec +/// typequal +/// storageclass +/// +/// FIXME: The GCC grammar/code for this construct implies we need two +/// token lookahead. Comment from gcc: "If they start with an identifier +/// which is followed by a comma or close parenthesis, then the arguments +/// start with that identifier; otherwise they are an expression list." +/// +/// At the moment, I am not doing 2 token lookahead. I am also unaware of +/// any attributes that don't work (based on my limited testing). Most +/// attributes are very simple in practice. Until we find a bug, I don't see +/// a pressing need to implement the 2 token lookahead. + +AttributeList *Parser::ParseAttributes() { + assert(Tok.getKind() == tok::kw___attribute && "Not an attribute list!"); + + AttributeList *CurrAttr = 0; + + while (Tok.getKind() == tok::kw___attribute) { + ConsumeToken(); + if (ExpectAndConsume(tok::l_paren, diag::err_expected_lparen_after, + "attribute")) { + SkipUntil(tok::r_paren, true); // skip until ) or ; + return CurrAttr; + } + if (ExpectAndConsume(tok::l_paren, diag::err_expected_lparen_after, "(")) { + SkipUntil(tok::r_paren, true); // skip until ) or ; + return CurrAttr; + } + // Parse the attribute-list. e.g. __attribute__(( weak, alias("__f") )) + while (Tok.getKind() == tok::identifier || isDeclarationSpecifier() || + Tok.getKind() == tok::comma) { + + if (Tok.getKind() == tok::comma) { + // allows for empty/non-empty attributes. ((__vector_size__(16),,,,)) + ConsumeToken(); + continue; + } + // we have an identifier or declaration specifier (const, int, etc.) + IdentifierInfo *AttrName = Tok.getIdentifierInfo(); + SourceLocation AttrNameLoc = ConsumeToken(); + + // check if we have a "paramterized" attribute + if (Tok.getKind() == tok::l_paren) { + ConsumeParen(); // ignore the left paren loc for now + + if (Tok.getKind() == tok::identifier) { + IdentifierInfo *ParmName = Tok.getIdentifierInfo(); + SourceLocation ParmLoc = ConsumeToken(); + + if (Tok.getKind() == tok::r_paren) { + // __attribute__(( mode(byte) )) + ConsumeParen(); // ignore the right paren loc for now + CurrAttr = new AttributeList(AttrName, AttrNameLoc, + ParmName, ParmLoc, 0, 0, CurrAttr); + } else if (Tok.getKind() == tok::comma) { + ConsumeToken(); + // __attribute__(( format(printf, 1, 2) )) + llvm::SmallVector<ExprTy*, 8> ArgExprs; + bool ArgExprsOk = true; + + // now parse the non-empty comma separated list of expressions + while (1) { + ExprResult ArgExpr = ParseAssignmentExpression(); + if (ArgExpr.isInvalid) { + ArgExprsOk = false; + SkipUntil(tok::r_paren); + break; + } else { + ArgExprs.push_back(ArgExpr.Val); + } + if (Tok.getKind() != tok::comma) + break; + ConsumeToken(); // Eat the comma, move to the next argument + } + if (ArgExprsOk && Tok.getKind() == tok::r_paren) { + ConsumeParen(); // ignore the right paren loc for now + CurrAttr = new AttributeList(AttrName, AttrNameLoc, ParmName, + ParmLoc, &ArgExprs[0], ArgExprs.size(), CurrAttr); + } + } + } else { // not an identifier + // parse a possibly empty comma separated list of expressions + if (Tok.getKind() == tok::r_paren) { + // __attribute__(( nonnull() )) + ConsumeParen(); // ignore the right paren loc for now + CurrAttr = new AttributeList(AttrName, AttrNameLoc, + 0, SourceLocation(), 0, 0, CurrAttr); + } else { + // __attribute__(( aligned(16) )) + llvm::SmallVector<ExprTy*, 8> ArgExprs; + bool ArgExprsOk = true; + + // now parse the list of expressions + while (1) { + ExprResult ArgExpr = ParseAssignmentExpression(); + if (ArgExpr.isInvalid) { + ArgExprsOk = false; + SkipUntil(tok::r_paren); + break; + } else { + ArgExprs.push_back(ArgExpr.Val); + } + if (Tok.getKind() != tok::comma) + break; + ConsumeToken(); // Eat the comma, move to the next argument + } + // Match the ')'. + if (ArgExprsOk && Tok.getKind() == tok::r_paren) { + ConsumeParen(); // ignore the right paren loc for now + CurrAttr = new AttributeList(AttrName, AttrNameLoc, 0, + SourceLocation(), &ArgExprs[0], ArgExprs.size(), + CurrAttr); + } + } + } + } else { + CurrAttr = new AttributeList(AttrName, AttrNameLoc, + 0, SourceLocation(), 0, 0, CurrAttr); + } + } + if (ExpectAndConsume(tok::r_paren, diag::err_expected_rparen)) + SkipUntil(tok::r_paren, false); + if (ExpectAndConsume(tok::r_paren, diag::err_expected_rparen)) + SkipUntil(tok::r_paren, false); + } + return CurrAttr; +} + +/// ParseDeclaration - Parse a full 'declaration', which consists of +/// declaration-specifiers, some number of declarators, and a semicolon. +/// 'Context' should be a Declarator::TheContext value. +Parser::DeclTy *Parser::ParseDeclaration(unsigned Context) { + // Parse the common declaration-specifiers piece. + DeclSpec DS; + ParseDeclarationSpecifiers(DS); + + // C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };" + // declaration-specifiers init-declarator-list[opt] ';' + if (Tok.getKind() == tok::semi) { + ConsumeToken(); + return Actions.ParsedFreeStandingDeclSpec(CurScope, DS); + } + + Declarator DeclaratorInfo(DS, (Declarator::TheContext)Context); + ParseDeclarator(DeclaratorInfo); + + return ParseInitDeclaratorListAfterFirstDeclarator(DeclaratorInfo); +} + +/// ParseInitDeclaratorListAfterFirstDeclarator - Parse 'declaration' after +/// parsing 'declaration-specifiers declarator'. This method is split out this +/// way to handle the ambiguity between top-level function-definitions and +/// declarations. +/// +/// declaration: [C99 6.7] +/// declaration-specifiers init-declarator-list[opt] ';' [TODO] +/// [!C99] init-declarator-list ';' [TODO] +/// [OMP] threadprivate-directive [TODO] +/// +/// init-declarator-list: [C99 6.7] +/// init-declarator +/// init-declarator-list ',' init-declarator +/// init-declarator: [C99 6.7] +/// declarator +/// declarator '=' initializer +/// [GNU] declarator simple-asm-expr[opt] attributes[opt] +/// [GNU] declarator simple-asm-expr[opt] attributes[opt] '=' initializer +/// +Parser::DeclTy *Parser:: +ParseInitDeclaratorListAfterFirstDeclarator(Declarator &D) { + + // Declarators may be grouped together ("int X, *Y, Z();"). Provide info so + // that they can be chained properly if the actions want this. + Parser::DeclTy *LastDeclInGroup = 0; + + // At this point, we know that it is not a function definition. Parse the + // rest of the init-declarator-list. + while (1) { + // If a simple-asm-expr is present, parse it. + if (Tok.getKind() == tok::kw_asm) + ParseSimpleAsm(); + + // If attributes are present, parse them. + if (Tok.getKind() == tok::kw___attribute) + D.AddAttributes(ParseAttributes()); + + // Parse declarator '=' initializer. + ExprResult Init; + if (Tok.getKind() == tok::equal) { + ConsumeToken(); + Init = ParseInitializer(); + if (Init.isInvalid) { + SkipUntil(tok::semi); + return 0; + } + } + + // Inform the current actions module that we just parsed this declarator. + // FIXME: pass asm & attributes. + LastDeclInGroup = Actions.ParseDeclarator(CurScope, D, Init.Val, + LastDeclInGroup); + + // If we don't have a comma, it is either the end of the list (a ';') or an + // error, bail out. + if (Tok.getKind() != tok::comma) + break; + + // Consume the comma. + ConsumeToken(); + + // Parse the next declarator. + D.clear(); + ParseDeclarator(D); + } + + if (Tok.getKind() == tok::semi) { + ConsumeToken(); + return Actions.FinalizeDeclaratorGroup(CurScope, LastDeclInGroup); + } + + Diag(Tok, diag::err_parse_error); + // Skip to end of block or statement + SkipUntil(tok::r_brace, true); + if (Tok.getKind() == tok::semi) + ConsumeToken(); + return 0; +} + +/// ParseSpecifierQualifierList +/// specifier-qualifier-list: +/// type-specifier specifier-qualifier-list[opt] +/// type-qualifier specifier-qualifier-list[opt] +/// [GNU] attributes specifier-qualifier-list[opt] +/// +void Parser::ParseSpecifierQualifierList(DeclSpec &DS) { + /// specifier-qualifier-list is a subset of declaration-specifiers. Just + /// parse declaration-specifiers and complain about extra stuff. + SourceLocation Loc = Tok.getLocation(); + ParseDeclarationSpecifiers(DS); + + // Validate declspec for type-name. + unsigned Specs = DS.getParsedSpecifiers(); + if (Specs == DeclSpec::PQ_None) + Diag(Tok, diag::err_typename_requires_specqual); + + // Issue diagnostic and remove storage class if present. + if (Specs & DeclSpec::PQ_StorageClassSpecifier) { + if (DS.getStorageClassSpecLoc().isValid()) + Diag(DS.getStorageClassSpecLoc(),diag::err_typename_invalid_storageclass); + else + Diag(DS.getThreadSpecLoc(), diag::err_typename_invalid_storageclass); + DS.ClearStorageClassSpecs(); + } + + // Issue diagnostic and remove function specfier if present. + if (Specs & DeclSpec::PQ_FunctionSpecifier) { + Diag(DS.getInlineSpecLoc(), diag::err_typename_invalid_functionspec); + DS.ClearFunctionSpecs(); + } +} + +/// ParseDeclarationSpecifiers +/// declaration-specifiers: [C99 6.7] +/// storage-class-specifier declaration-specifiers[opt] +/// type-specifier declaration-specifiers[opt] +/// type-qualifier declaration-specifiers[opt] +/// [C99] function-specifier declaration-specifiers[opt] +/// [GNU] attributes declaration-specifiers[opt] +/// +/// storage-class-specifier: [C99 6.7.1] +/// 'typedef' +/// 'extern' +/// 'static' +/// 'auto' +/// 'register' +/// [GNU] '__thread' +/// type-specifier: [C99 6.7.2] +/// 'void' +/// 'char' +/// 'short' +/// 'int' +/// 'long' +/// 'float' +/// 'double' +/// 'signed' +/// 'unsigned' +/// struct-or-union-specifier +/// enum-specifier +/// typedef-name +/// [C++] 'bool' +/// [C99] '_Bool' +/// [C99] '_Complex' +/// [C99] '_Imaginary' // Removed in TC2? +/// [GNU] '_Decimal32' +/// [GNU] '_Decimal64' +/// [GNU] '_Decimal128' +/// [GNU] typeof-specifier [TODO] +/// [OBJC] class-name objc-protocol-refs[opt] [TODO] +/// [OBJC] typedef-name objc-protocol-refs [TODO] +/// [OBJC] objc-protocol-refs [TODO] +/// type-qualifier: +/// 'const' +/// 'volatile' +/// [C99] 'restrict' +/// function-specifier: [C99 6.7.4] +/// [C99] 'inline' +/// +void Parser::ParseDeclarationSpecifiers(DeclSpec &DS) { + while (1) { + int isInvalid = false; + const char *PrevSpec = 0; + SourceLocation Loc = Tok.getLocation(); + + switch (Tok.getKind()) { + // typedef-name + case tok::identifier: + // This identifier can only be a typedef name if we haven't already seen + // a type-specifier. Without this check we misparse: + // typedef int X; struct Y { short X; }; as 'short int'. + if (!DS.hasTypeSpecifier()) { + // It has to be available as a typedef too! + if (void *TypeRep = Actions.isTypeName(*Tok.getIdentifierInfo(), + CurScope)) { + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_typedef, Loc, PrevSpec, + TypeRep); + break; + } + } + // FALL THROUGH. + default: + // If this is not a declaration specifier token, we're done reading decl + // specifiers. First verify that DeclSpec's are consistent. + DS.Finish(Diags, getLang()); + return; + + // GNU attributes support. + case tok::kw___attribute: + DS.AddAttributes(ParseAttributes()); + continue; + + // storage-class-specifier + case tok::kw_typedef: + isInvalid = DS.SetStorageClassSpec(DeclSpec::SCS_typedef, Loc, PrevSpec); + break; + case tok::kw_extern: + if (DS.isThreadSpecified()) + Diag(Tok, diag::ext_thread_before, "extern"); + isInvalid = DS.SetStorageClassSpec(DeclSpec::SCS_extern, Loc, PrevSpec); + break; + case tok::kw_static: + if (DS.isThreadSpecified()) + Diag(Tok, diag::ext_thread_before, "static"); + isInvalid = DS.SetStorageClassSpec(DeclSpec::SCS_static, Loc, PrevSpec); + break; + case tok::kw_auto: + isInvalid = DS.SetStorageClassSpec(DeclSpec::SCS_auto, Loc, PrevSpec); + break; + case tok::kw_register: + isInvalid = DS.SetStorageClassSpec(DeclSpec::SCS_register, Loc, PrevSpec); + break; + case tok::kw___thread: + isInvalid = DS.SetStorageClassSpecThread(Loc, PrevSpec)*2; + break; + + // type-specifiers + case tok::kw_short: + isInvalid = DS.SetTypeSpecWidth(DeclSpec::TSW_short, Loc, PrevSpec); + break; + case tok::kw_long: + if (DS.getTypeSpecWidth() != DeclSpec::TSW_long) + isInvalid = DS.SetTypeSpecWidth(DeclSpec::TSW_long, Loc, PrevSpec); + else + isInvalid = DS.SetTypeSpecWidth(DeclSpec::TSW_longlong, Loc, PrevSpec); + break; + case tok::kw_signed: + isInvalid = DS.SetTypeSpecSign(DeclSpec::TSS_signed, Loc, PrevSpec); + break; + case tok::kw_unsigned: + isInvalid = DS.SetTypeSpecSign(DeclSpec::TSS_unsigned, Loc, PrevSpec); + break; + case tok::kw__Complex: + isInvalid = DS.SetTypeSpecComplex(DeclSpec::TSC_complex, Loc, PrevSpec); + break; + case tok::kw__Imaginary: + isInvalid = DS.SetTypeSpecComplex(DeclSpec::TSC_imaginary, Loc, PrevSpec); + break; + case tok::kw_void: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_void, Loc, PrevSpec); + break; + case tok::kw_char: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_char, Loc, PrevSpec); + break; + case tok::kw_int: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_int, Loc, PrevSpec); + break; + case tok::kw_float: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_float, Loc, PrevSpec); + break; + case tok::kw_double: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_double, Loc, PrevSpec); + break; + case tok::kw_bool: // [C++ 2.11p1] + case tok::kw__Bool: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_bool, Loc, PrevSpec); + break; + case tok::kw__Decimal32: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_decimal32, Loc, PrevSpec); + break; + case tok::kw__Decimal64: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_decimal64, Loc, PrevSpec); + break; + case tok::kw__Decimal128: + isInvalid = DS.SetTypeSpecType(DeclSpec::TST_decimal128, Loc, PrevSpec); + break; + + case tok::kw_struct: + case tok::kw_union: + ParseStructUnionSpecifier(DS); + continue; + case tok::kw_enum: + ParseEnumSpecifier(DS); + continue; + + // type-qualifier + case tok::kw_const: + isInvalid = DS.SetTypeQual(DeclSpec::TQ_const , Loc, PrevSpec, + getLang())*2; + break; + case tok::kw_volatile: + isInvalid = DS.SetTypeQual(DeclSpec::TQ_volatile, Loc, PrevSpec, + getLang())*2; + break; + case tok::kw_restrict: + isInvalid = DS.SetTypeQual(DeclSpec::TQ_restrict, Loc, PrevSpec, + getLang())*2; + break; + + // function-specifier + case tok::kw_inline: + isInvalid = DS.SetFunctionSpecInline(Loc, PrevSpec); + break; + } + // If the specifier combination wasn't legal, issue a diagnostic. + if (isInvalid) { + assert(PrevSpec && "Method did not return previous specifier!"); + if (isInvalid == 1) // Error. + Diag(Tok, diag::err_invalid_decl_spec_combination, PrevSpec); + else // extwarn. + Diag(Tok, diag::ext_duplicate_declspec, PrevSpec); + } + ConsumeToken(); + } +} + +/// ParseTag - Parse "struct-or-union-or-class-or-enum identifier[opt]", where +/// the first token has already been read and has been turned into an instance +/// of DeclSpec::TST (TagType). This returns true if there is an error parsing, +/// otherwise it returns false and fills in Decl. +bool Parser::ParseTag(DeclTy *&Decl, unsigned TagType, SourceLocation StartLoc){ + AttributeList *Attr = 0; + // If attributes exist after tag, parse them. + if (Tok.getKind() == tok::kw___attribute) + Attr = ParseAttributes(); + + // Must have either 'struct name' or 'struct {...}'. + if (Tok.getKind() != tok::identifier && + Tok.getKind() != tok::l_brace) { + Diag(Tok, diag::err_expected_ident_lbrace); + // TODO: better error recovery here. + return true; + } + + // If an identifier is present, consume and remember it. + IdentifierInfo *Name = 0; + SourceLocation NameLoc; + if (Tok.getKind() == tok::identifier) { + Name = Tok.getIdentifierInfo(); + NameLoc = ConsumeToken(); + } + + // There are three options here. If we have 'struct foo;', then this is a + // forward declaration. If we have 'struct foo {...' then this is a + // definition. Otherwise we have something like 'struct foo xyz', a reference. + // + // This is needed to handle stuff like this right (C99 6.7.2.3p11): + // struct foo {..}; void bar() { struct foo; } <- new foo in bar. + // struct foo {..}; void bar() { struct foo x; } <- use of old foo. + // + Action::TagKind TK; + if (Tok.getKind() == tok::l_brace) + TK = Action::TK_Definition; + else if (Tok.getKind() == tok::semi) + TK = Action::TK_Declaration; + else + TK = Action::TK_Reference; + Decl = Actions.ParseTag(CurScope, TagType, TK, StartLoc, Name, NameLoc, Attr); + return false; +} + + +/// ParseStructUnionSpecifier +/// struct-or-union-specifier: [C99 6.7.2.1] +/// struct-or-union identifier[opt] '{' struct-contents '}' +/// struct-or-union identifier +/// [GNU] struct-or-union attributes[opt] identifier[opt] '{' struct-contents +/// '}' attributes[opt] +/// [GNU] struct-or-union attributes[opt] identifier +/// struct-or-union: +/// 'struct' +/// 'union' +/// +void Parser::ParseStructUnionSpecifier(DeclSpec &DS) { + assert((Tok.getKind() == tok::kw_struct || + Tok.getKind() == tok::kw_union) && "Not a struct/union specifier"); + DeclSpec::TST TagType = + Tok.getKind() == tok::kw_union ? DeclSpec::TST_union : DeclSpec::TST_struct; + SourceLocation StartLoc = ConsumeToken(); + + // Parse the tag portion of this. + DeclTy *TagDecl; + if (ParseTag(TagDecl, TagType, StartLoc)) + return; + + // If there is a body, parse it and inform the actions module. + if (Tok.getKind() == tok::l_brace) + ParseStructUnionBody(StartLoc, TagType, TagDecl); + + const char *PrevSpec = 0; + if (DS.SetTypeSpecType(TagType, StartLoc, PrevSpec, TagDecl)) + Diag(StartLoc, diag::err_invalid_decl_spec_combination, PrevSpec); +} + + +/// ParseStructUnionBody +/// struct-contents: +/// struct-declaration-list +/// [EXT] empty +/// [GNU] "struct-declaration-list" without terminatoring ';' +/// struct-declaration-list: +/// struct-declaration +/// struct-declaration-list struct-declaration +/// [OBC] '@' 'defs' '(' class-name ')' [TODO] +/// struct-declaration: +/// specifier-qualifier-list struct-declarator-list ';' +/// [GNU] __extension__ struct-declaration +/// [GNU] specifier-qualifier-list ';' +/// struct-declarator-list: +/// struct-declarator +/// struct-declarator-list ',' struct-declarator +/// [GNU] struct-declarator-list ',' attributes[opt] struct-declarator +/// struct-declarator: +/// declarator +/// [GNU] declarator attributes[opt] +/// declarator[opt] ':' constant-expression +/// [GNU] declarator[opt] ':' constant-expression attributes[opt] +/// +void Parser::ParseStructUnionBody(SourceLocation RecordLoc, + unsigned TagType, DeclTy *TagDecl) { + SourceLocation LBraceLoc = ConsumeBrace(); + + // Empty structs are an extension in C (C99 6.7.2.1p7), but are allowed in + // C++. + if (Tok.getKind() == tok::r_brace) + Diag(Tok, diag::ext_empty_struct_union_enum, + DeclSpec::getSpecifierName((DeclSpec::TST)TagType)); + + llvm::SmallVector<DeclTy*, 32> FieldDecls; + + // While we still have something to read, read the declarations in the struct. + while (Tok.getKind() != tok::r_brace && + Tok.getKind() != tok::eof) { + // Each iteration of this loop reads one struct-declaration. + + // Check for extraneous top-level semicolon. + if (Tok.getKind() == tok::semi) { + Diag(Tok, diag::ext_extra_struct_semi); + ConsumeToken(); + continue; + } + + // FIXME: When __extension__ is specified, disable extension diagnostics. + if (Tok.getKind() == tok::kw___extension__) + ConsumeToken(); + + // Parse the common specifier-qualifiers-list piece. + DeclSpec DS; + SourceLocation SpecQualLoc = Tok.getLocation(); + ParseSpecifierQualifierList(DS); + // TODO: Does specifier-qualifier list correctly check that *something* is + // specified? + + // If there are no declarators, issue a warning. + if (Tok.getKind() == tok::semi) { + Diag(SpecQualLoc, diag::w_no_declarators); + ConsumeToken(); + continue; + } + + // Read struct-declarators until we find the semicolon. + Declarator DeclaratorInfo(DS, Declarator::MemberContext); + + while (1) { + /// struct-declarator: declarator + /// struct-declarator: declarator[opt] ':' constant-expression + if (Tok.getKind() != tok::colon) + ParseDeclarator(DeclaratorInfo); + + ExprTy *BitfieldSize = 0; + if (Tok.getKind() == tok::colon) { + ConsumeToken(); + ExprResult Res = ParseConstantExpression(); + if (Res.isInvalid) { + SkipUntil(tok::semi, true, true); + } else { + BitfieldSize = Res.Val; + } + } + + // If attributes exist after the declarator, parse them. + if (Tok.getKind() == tok::kw___attribute) + DeclaratorInfo.AddAttributes(ParseAttributes()); + + // Install the declarator into the current TagDecl. + DeclTy *Field = Actions.ParseField(CurScope, TagDecl, SpecQualLoc, + DeclaratorInfo, BitfieldSize); + FieldDecls.push_back(Field); + + // If we don't have a comma, it is either the end of the list (a ';') + // or an error, bail out. + if (Tok.getKind() != tok::comma) + break; + + // Consume the comma. + ConsumeToken(); + + // Parse the next declarator. + DeclaratorInfo.clear(); + + // Attributes are only allowed on the second declarator. + if (Tok.getKind() == tok::kw___attribute) + DeclaratorInfo.AddAttributes(ParseAttributes()); + } + + if (Tok.getKind() == tok::semi) { + ConsumeToken(); + } else if (Tok.getKind() == tok::r_brace) { + Diag(Tok.getLocation(), diag::ext_expected_semi_decl_list); + break; + } else { + Diag(Tok, diag::err_expected_semi_decl_list); + // Skip to end of block or statement + SkipUntil(tok::r_brace, true, true); + } + } + + MatchRHSPunctuation(tok::r_brace, LBraceLoc); + + Actions.ParseRecordBody(RecordLoc, TagDecl, &FieldDecls[0],FieldDecls.size()); + + AttributeList *AttrList = 0; + // If attributes exist after struct contents, parse them. + if (Tok.getKind() == tok::kw___attribute) + AttrList = ParseAttributes(); // FIXME: where should I put them? +} + + +/// ParseEnumSpecifier +/// enum-specifier: [C99 6.7.2.2] +/// 'enum' identifier[opt] '{' enumerator-list '}' +/// [C99] 'enum' identifier[opt] '{' enumerator-list ',' '}' +/// [GNU] 'enum' attributes[opt] identifier[opt] '{' enumerator-list ',' [opt] +/// '}' attributes[opt] +/// 'enum' identifier +/// [GNU] 'enum' attributes[opt] identifier +void Parser::ParseEnumSpecifier(DeclSpec &DS) { + assert(Tok.getKind() == tok::kw_enum && "Not an enum specifier"); + SourceLocation StartLoc = ConsumeToken(); + + // Parse the tag portion of this. + DeclTy *TagDecl; + if (ParseTag(TagDecl, DeclSpec::TST_enum, StartLoc)) + return; + + if (Tok.getKind() == tok::l_brace) + ParseEnumBody(StartLoc, TagDecl); + + // TODO: semantic analysis on the declspec for enums. + const char *PrevSpec = 0; + if (DS.SetTypeSpecType(DeclSpec::TST_enum, StartLoc, PrevSpec, TagDecl)) + Diag(StartLoc, diag::err_invalid_decl_spec_combination, PrevSpec); +} + +/// ParseEnumBody - Parse a {} enclosed enumerator-list. +/// enumerator-list: +/// enumerator +/// enumerator-list ',' enumerator +/// enumerator: +/// enumeration-constant +/// enumeration-constant '=' constant-expression +/// enumeration-constant: +/// identifier +/// +void Parser::ParseEnumBody(SourceLocation StartLoc, DeclTy *EnumDecl) { + SourceLocation LBraceLoc = ConsumeBrace(); + + if (Tok.getKind() == tok::r_brace) + Diag(Tok, diag::ext_empty_struct_union_enum, "enum"); + + llvm::SmallVector<DeclTy*, 32> EnumConstantDecls; + + DeclTy *LastEnumConstDecl = 0; + + // Parse the enumerator-list. + while (Tok.getKind() == tok::identifier) { + IdentifierInfo *Ident = Tok.getIdentifierInfo(); + SourceLocation IdentLoc = ConsumeToken(); + + SourceLocation EqualLoc; + ExprTy *AssignedVal = 0; + if (Tok.getKind() == tok::equal) { + EqualLoc = ConsumeToken(); + ExprResult Res = ParseConstantExpression(); + if (Res.isInvalid) + SkipUntil(tok::comma, tok::r_brace, true, true); + else + AssignedVal = Res.Val; + } + + // Install the enumerator constant into EnumDecl. + DeclTy *EnumConstDecl = Actions.ParseEnumConstant(CurScope, EnumDecl, + LastEnumConstDecl, + IdentLoc, Ident, + EqualLoc, AssignedVal); + EnumConstantDecls.push_back(EnumConstDecl); + LastEnumConstDecl = EnumConstDecl; + + if (Tok.getKind() != tok::comma) + break; + SourceLocation CommaLoc = ConsumeToken(); + + if (Tok.getKind() != tok::identifier && !getLang().C99) + Diag(CommaLoc, diag::ext_c99_enumerator_list_comma); + } + + // Eat the }. + MatchRHSPunctuation(tok::r_brace, LBraceLoc); + + Actions.ParseEnumBody(StartLoc, EnumDecl, &EnumConstantDecls[0], + EnumConstantDecls.size()); + + DeclTy *AttrList = 0; + // If attributes exist after the identifier list, parse them. + if (Tok.getKind() == tok::kw___attribute) + AttrList = ParseAttributes(); // FIXME: where do they do? +} + +/// isTypeSpecifierQualifier - Return true if the current token could be the +/// start of a specifier-qualifier-list. +bool Parser::isTypeSpecifierQualifier() const { + switch (Tok.getKind()) { + default: return false; + // GNU attributes support. + case tok::kw___attribute: + // type-specifiers + case tok::kw_short: + case tok::kw_long: + case tok::kw_signed: + case tok::kw_unsigned: + case tok::kw__Complex: + case tok::kw__Imaginary: + case tok::kw_void: + case tok::kw_char: + case tok::kw_int: + case tok::kw_float: + case tok::kw_double: + case tok::kw__Bool: + case tok::kw__Decimal32: + case tok::kw__Decimal64: + case tok::kw__Decimal128: + + // struct-or-union-specifier + case tok::kw_struct: + case tok::kw_union: + // enum-specifier + case tok::kw_enum: + + // type-qualifier + case tok::kw_const: + case tok::kw_volatile: + case tok::kw_restrict: + return true; + + // typedef-name + case tok::identifier: + return Actions.isTypeName(*Tok.getIdentifierInfo(), CurScope) != 0; + + // TODO: Attributes. + } +} + +/// isDeclarationSpecifier() - Return true if the current token is part of a +/// declaration specifier. +bool Parser::isDeclarationSpecifier() const { + switch (Tok.getKind()) { + default: return false; + // storage-class-specifier + case tok::kw_typedef: + case tok::kw_extern: + case tok::kw_static: + case tok::kw_auto: + case tok::kw_register: + case tok::kw___thread: + + // type-specifiers + case tok::kw_short: + case tok::kw_long: + case tok::kw_signed: + case tok::kw_unsigned: + case tok::kw__Complex: + case tok::kw__Imaginary: + case tok::kw_void: + case tok::kw_char: + case tok::kw_int: + case tok::kw_float: + case tok::kw_double: + case tok::kw__Bool: + case tok::kw__Decimal32: + case tok::kw__Decimal64: + case tok::kw__Decimal128: + + // struct-or-union-specifier + case tok::kw_struct: + case tok::kw_union: + // enum-specifier + case tok::kw_enum: + + // type-qualifier + case tok::kw_const: + case tok::kw_volatile: + case tok::kw_restrict: + + // function-specifier + case tok::kw_inline: + return true; + + // typedef-name + case tok::identifier: + return Actions.isTypeName(*Tok.getIdentifierInfo(), CurScope) != 0; + // TODO: Attributes. + } +} + + +/// ParseTypeQualifierListOpt +/// type-qualifier-list: [C99 6.7.5] +/// type-qualifier +/// [GNU] attributes +/// type-qualifier-list type-qualifier +/// [GNU] type-qualifier-list attributes +/// +void Parser::ParseTypeQualifierListOpt(DeclSpec &DS) { + while (1) { + int isInvalid = false; + const char *PrevSpec = 0; + SourceLocation Loc = Tok.getLocation(); + + switch (Tok.getKind()) { + default: + // If this is not a type-qualifier token, we're done reading type + // qualifiers. First verify that DeclSpec's are consistent. + DS.Finish(Diags, getLang()); + return; + case tok::kw_const: + isInvalid = DS.SetTypeQual(DeclSpec::TQ_const , Loc, PrevSpec, + getLang())*2; + break; + case tok::kw_volatile: + isInvalid = DS.SetTypeQual(DeclSpec::TQ_volatile, Loc, PrevSpec, + getLang())*2; + break; + case tok::kw_restrict: + isInvalid = DS.SetTypeQual(DeclSpec::TQ_restrict, Loc, PrevSpec, + getLang())*2; + break; + case tok::kw___attribute: + DS.AddAttributes(ParseAttributes()); + continue; // do *not* consume the next token! + } + + // If the specifier combination wasn't legal, issue a diagnostic. + if (isInvalid) { + assert(PrevSpec && "Method did not return previous specifier!"); + if (isInvalid == 1) // Error. + Diag(Tok, diag::err_invalid_decl_spec_combination, PrevSpec); + else // extwarn. + Diag(Tok, diag::ext_duplicate_declspec, PrevSpec); + } + ConsumeToken(); + } +} + + +/// ParseDeclarator - Parse and verify a newly-initialized declarator. +/// +void Parser::ParseDeclarator(Declarator &D) { + /// This implements the 'declarator' production in the C grammar, then checks + /// for well-formedness and issues diagnostics. + ParseDeclaratorInternal(D); + + // TODO: validate D. + +} + +/// ParseDeclaratorInternal +/// declarator: [C99 6.7.5] +/// pointer[opt] direct-declarator +/// [C++] '&' declarator [C++ 8p4, dcl.decl] +/// [GNU] '&' restrict[opt] attributes[opt] declarator +/// +/// pointer: [C99 6.7.5] +/// '*' type-qualifier-list[opt] +/// '*' type-qualifier-list[opt] pointer +/// +void Parser::ParseDeclaratorInternal(Declarator &D) { + tok::TokenKind Kind = Tok.getKind(); + + // Not a pointer or C++ reference. + if (Kind != tok::star && !(Kind == tok::amp && getLang().CPlusPlus)) + return ParseDirectDeclarator(D); + + // Otherwise, '*' -> pointer or '&' -> reference. + SourceLocation Loc = ConsumeToken(); // Eat the * or &. + + if (Kind == tok::star) { + // Is a pointer + DeclSpec DS; + + ParseTypeQualifierListOpt(DS); + + // Recursively parse the declarator. + ParseDeclaratorInternal(D); + + // Remember that we parsed a pointer type, and remember the type-quals. + D.AddTypeInfo(DeclaratorChunk::getPointer(DS.getTypeQualifiers(), Loc)); + } else { + // Is a reference + DeclSpec DS; + + // C++ 8.3.2p1: cv-qualified references are ill-formed except when the + // cv-qualifiers are introduced through the use of a typedef or of a + // template type argument, in which case the cv-qualifiers are ignored. + // + // [GNU] Retricted references are allowed. + // [GNU] Attributes on references are allowed. + ParseTypeQualifierListOpt(DS); + + if (DS.getTypeQualifiers() != DeclSpec::TQ_unspecified) { + if (DS.getTypeQualifiers() & DeclSpec::TQ_const) + Diag(DS.getConstSpecLoc(), + diag::err_invalid_reference_qualifier_application, + "const"); + if (DS.getTypeQualifiers() & DeclSpec::TQ_volatile) + Diag(DS.getVolatileSpecLoc(), + diag::err_invalid_reference_qualifier_application, + "volatile"); + } + + // Recursively parse the declarator. + ParseDeclaratorInternal(D); + + // Remember that we parsed a reference type. It doesn't have type-quals. + D.AddTypeInfo(DeclaratorChunk::getReference(DS.getTypeQualifiers(), Loc)); + } +} + +/// ParseDirectDeclarator +/// direct-declarator: [C99 6.7.5] +/// identifier +/// '(' declarator ')' +/// [GNU] '(' attributes declarator ')' +/// [C90] direct-declarator '[' constant-expression[opt] ']' +/// [C99] direct-declarator '[' type-qual-list[opt] assignment-expr[opt] ']' +/// [C99] direct-declarator '[' 'static' type-qual-list[opt] assign-expr ']' +/// [C99] direct-declarator '[' type-qual-list 'static' assignment-expr ']' +/// [C99] direct-declarator '[' type-qual-list[opt] '*' ']' +/// direct-declarator '(' parameter-type-list ')' +/// direct-declarator '(' identifier-list[opt] ')' +/// [GNU] direct-declarator '(' parameter-forward-declarations +/// parameter-type-list[opt] ')' +/// +void Parser::ParseDirectDeclarator(Declarator &D) { + // Parse the first direct-declarator seen. + if (Tok.getKind() == tok::identifier && D.mayHaveIdentifier()) { + assert(Tok.getIdentifierInfo() && "Not an identifier?"); + D.SetIdentifier(Tok.getIdentifierInfo(), Tok.getLocation()); + ConsumeToken(); + } else if (Tok.getKind() == tok::l_paren) { + // direct-declarator: '(' declarator ')' + // direct-declarator: '(' attributes declarator ')' + // Example: 'char (*X)' or 'int (*XX)(void)' + ParseParenDeclarator(D); + } else if (D.mayOmitIdentifier()) { + // This could be something simple like "int" (in which case the declarator + // portion is empty), if an abstract-declarator is allowed. + D.SetIdentifier(0, Tok.getLocation()); + } else { + // Expected identifier or '('. + Diag(Tok, diag::err_expected_ident_lparen); + D.SetIdentifier(0, Tok.getLocation()); + } + + assert(D.isPastIdentifier() && + "Haven't past the location of the identifier yet?"); + + while (1) { + if (Tok.getKind() == tok::l_paren) { + ParseParenDeclarator(D); + } else if (Tok.getKind() == tok::l_square) { + ParseBracketDeclarator(D); + } else { + break; + } + } +} + +/// ParseParenDeclarator - We parsed the declarator D up to a paren. This may +/// either be before the identifier (in which case these are just grouping +/// parens for precedence) or it may be after the identifier, in which case +/// these are function arguments. +/// +/// This method also handles this portion of the grammar: +/// parameter-type-list: [C99 6.7.5] +/// parameter-list +/// parameter-list ',' '...' +/// +/// parameter-list: [C99 6.7.5] +/// parameter-declaration +/// parameter-list ',' parameter-declaration +/// +/// parameter-declaration: [C99 6.7.5] +/// declaration-specifiers declarator +/// [GNU] declaration-specifiers declarator attributes +/// declaration-specifiers abstract-declarator[opt] +/// [GNU] declaration-specifiers abstract-declarator[opt] attributes +/// +/// identifier-list: [C99 6.7.5] +/// identifier +/// identifier-list ',' identifier +/// +void Parser::ParseParenDeclarator(Declarator &D) { + SourceLocation StartLoc = ConsumeParen(); + + // If we haven't past the identifier yet (or where the identifier would be + // stored, if this is an abstract declarator), then this is probably just + // grouping parens. + if (!D.isPastIdentifier()) { + // Okay, this is probably a grouping paren. However, if this could be an + // abstract-declarator, then this could also be the start of function + // arguments (consider 'void()'). + bool isGrouping; + + if (!D.mayOmitIdentifier()) { + // If this can't be an abstract-declarator, this *must* be a grouping + // paren, because we haven't seen the identifier yet. + isGrouping = true; + } else if (Tok.getKind() == tok::r_paren || // 'int()' is a function. + isDeclarationSpecifier()) { // 'int(int)' is a function. + // This handles C99 6.7.5.3p11: in "typedef int X; void foo(X)", X is + // considered to be a type, not a K&R identifier-list. + isGrouping = false; + } else { + // Otherwise, this is a grouping paren, e.g. 'int (*X)' or 'int(X)'. + isGrouping = true; + } + + // If this is a grouping paren, handle: + // direct-declarator: '(' declarator ')' + // direct-declarator: '(' attributes declarator ')' + if (isGrouping) { + if (Tok.getKind() == tok::kw___attribute) + D.AddAttributes(ParseAttributes()); + + ParseDeclaratorInternal(D); + // Match the ')'. + MatchRHSPunctuation(tok::r_paren, StartLoc); + return; + } + + // Okay, if this wasn't a grouping paren, it must be the start of a function + // argument list. Recognize that this declarator will never have an + // identifier (and remember where it would have been), then fall through to + // the handling of argument lists. + D.SetIdentifier(0, Tok.getLocation()); + } + + // Okay, this is the parameter list of a function definition, or it is an + // identifier list of a K&R-style function. + bool IsVariadic; + bool HasPrototype; + bool ErrorEmitted = false; + + // Build up an array of information about the parsed arguments. + llvm::SmallVector<DeclaratorChunk::ParamInfo, 16> ParamInfo; + llvm::SmallSet<const IdentifierInfo*, 16> ParamsSoFar; + + if (Tok.getKind() == tok::r_paren) { + // int() -> no prototype, no '...'. + IsVariadic = false; + HasPrototype = false; + } else if (Tok.getKind() == tok::identifier && + // K&R identifier lists can't have typedefs as identifiers, per + // C99 6.7.5.3p11. + !Actions.isTypeName(*Tok.getIdentifierInfo(), CurScope)) { + // Identifier list. Note that '(' identifier-list ')' is only allowed for + // normal declarators, not for abstract-declarators. + assert(D.isPastIdentifier() && "Identifier (if present) must be passed!"); + + // If there was no identifier specified, either we are in an + // abstract-declarator, or we are in a parameter declarator which was found + // to be abstract. In abstract-declarators, identifier lists are not valid, + // diagnose this. + if (!D.getIdentifier()) + Diag(Tok, diag::ext_ident_list_in_param); + + // Remember this identifier in ParamInfo. + ParamInfo.push_back(DeclaratorChunk::ParamInfo(Tok.getIdentifierInfo(), + Tok.getLocation(), 0)); + + ConsumeToken(); + while (Tok.getKind() == tok::comma) { + // Eat the comma. + ConsumeToken(); + + if (Tok.getKind() != tok::identifier) { + Diag(Tok, diag::err_expected_ident); + ErrorEmitted = true; + break; + } + + IdentifierInfo *ParmII = Tok.getIdentifierInfo(); + + // Verify that the argument identifier has not already been mentioned. + if (!ParamsSoFar.insert(ParmII)) { + Diag(Tok.getLocation(), diag::err_param_redefinition,ParmII->getName()); + ParmII = 0; + } + + // Remember this identifier in ParamInfo. + if (ParmII) + ParamInfo.push_back(DeclaratorChunk::ParamInfo(ParmII, + Tok.getLocation(), 0)); + + // Eat the identifier. + ConsumeToken(); + } + + // K&R 'prototype'. + IsVariadic = false; + HasPrototype = false; + } else { + // Finally, a normal, non-empty parameter type list. + + // Enter function-declaration scope, limiting any declarators for struct + // tags to the function prototype scope. + // FIXME: is this needed? + EnterScope(0); + + IsVariadic = false; + while (1) { + if (Tok.getKind() == tok::ellipsis) { + IsVariadic = true; + + // Check to see if this is "void(...)" which is not allowed. + if (ParamInfo.empty()) { + // Otherwise, parse parameter type list. If it starts with an + // ellipsis, diagnose the malformed function. + Diag(Tok, diag::err_ellipsis_first_arg); + IsVariadic = false; // Treat this like 'void()'. + } + + // Consume the ellipsis. + ConsumeToken(); + break; + } + + // Parse the declaration-specifiers. + DeclSpec DS; + ParseDeclarationSpecifiers(DS); + + // Parse the declarator. This is "PrototypeContext", because we must + // accept either 'declarator' or 'abstract-declarator' here. + Declarator ParmDecl(DS, Declarator::PrototypeContext); + ParseDeclarator(ParmDecl); + + // Parse GNU attributes, if present. + if (Tok.getKind() == tok::kw___attribute) + ParmDecl.AddAttributes(ParseAttributes()); + + // Verify C99 6.7.5.3p2: The only SCS allowed is 'register'. + // NOTE: we could trivially allow 'int foo(auto int X)' if we wanted. + if (DS.getStorageClassSpec() != DeclSpec::SCS_unspecified && + DS.getStorageClassSpec() != DeclSpec::SCS_register) { + Diag(DS.getStorageClassSpecLoc(), + diag::err_invalid_storage_class_in_func_decl); + DS.ClearStorageClassSpecs(); + } + if (DS.isThreadSpecified()) { + Diag(DS.getThreadSpecLoc(), + diag::err_invalid_storage_class_in_func_decl); + DS.ClearStorageClassSpecs(); + } + + // Inform the actions module about the parameter declarator, so it gets + // added to the current scope. + Action::TypeResult ParamTy = + Actions.ParseParamDeclaratorType(CurScope, ParmDecl); + + // Remember this parsed parameter in ParamInfo. + IdentifierInfo *ParmII = ParmDecl.getIdentifier(); + + // Verify that the argument identifier has not already been mentioned. + if (ParmII && !ParamsSoFar.insert(ParmII)) { + Diag(ParmDecl.getIdentifierLoc(), diag::err_param_redefinition, + ParmII->getName()); + ParmII = 0; + } + + ParamInfo.push_back(DeclaratorChunk::ParamInfo(ParmII, + ParmDecl.getIdentifierLoc(), + ParamTy.Val)); + + // If the next token is a comma, consume it and keep reading arguments. + if (Tok.getKind() != tok::comma) break; + + // Consume the comma. + ConsumeToken(); + } + + HasPrototype = true; + + // Leave prototype scope. + ExitScope(); + } + + // Remember that we parsed a function type, and remember the attributes. + if (!ErrorEmitted) + D.AddTypeInfo(DeclaratorChunk::getFunction(HasPrototype, IsVariadic, + &ParamInfo[0], ParamInfo.size(), + StartLoc)); + + // If we have the closing ')', eat it and we're done. + if (Tok.getKind() == tok::r_paren) { + ConsumeParen(); + } else { + // If an error happened earlier parsing something else in the proto, don't + // issue another error. + if (!ErrorEmitted) + Diag(Tok, diag::err_expected_rparen); + SkipUntil(tok::r_paren); + } +} + + +/// [C90] direct-declarator '[' constant-expression[opt] ']' +/// [C99] direct-declarator '[' type-qual-list[opt] assignment-expr[opt] ']' +/// [C99] direct-declarator '[' 'static' type-qual-list[opt] assign-expr ']' +/// [C99] direct-declarator '[' type-qual-list 'static' assignment-expr ']' +/// [C99] direct-declarator '[' type-qual-list[opt] '*' ']' +void Parser::ParseBracketDeclarator(Declarator &D) { + SourceLocation StartLoc = ConsumeBracket(); + + // If valid, this location is the position where we read the 'static' keyword. + SourceLocation StaticLoc; + if (Tok.getKind() == tok::kw_static) + StaticLoc = ConsumeToken(); + + // If there is a type-qualifier-list, read it now. + DeclSpec DS; + ParseTypeQualifierListOpt(DS); + + // If we haven't already read 'static', check to see if there is one after the + // type-qualifier-list. + if (!StaticLoc.isValid() && Tok.getKind() == tok::kw_static) + StaticLoc = ConsumeToken(); + + // Handle "direct-declarator [ type-qual-list[opt] * ]". + bool isStar = false; + ExprResult NumElements(false); + if (Tok.getKind() == tok::star) { + // Remember the '*' token, in case we have to un-get it. + LexerToken StarTok = Tok; + ConsumeToken(); + + // Check that the ']' token is present to avoid incorrectly parsing + // expressions starting with '*' as [*]. + if (Tok.getKind() == tok::r_square) { + if (StaticLoc.isValid()) + Diag(StaticLoc, diag::err_unspecified_vla_size_with_static); + StaticLoc = SourceLocation(); // Drop the static. + isStar = true; + } else { + // Otherwise, the * must have been some expression (such as '*ptr') that + // started an assignment-expr. We already consumed the token, but now we + // need to reparse it. This handles cases like 'X[*p + 4]' + NumElements = ParseAssignmentExpressionWithLeadingStar(StarTok); + } + } else if (Tok.getKind() != tok::r_square) { + // Parse the assignment-expression now. + NumElements = ParseAssignmentExpression(); + } + + // If there was an error parsing the assignment-expression, recover. + if (NumElements.isInvalid) { + // If the expression was invalid, skip it. + SkipUntil(tok::r_square); + return; + } + + MatchRHSPunctuation(tok::r_square, StartLoc); + + // If C99 isn't enabled, emit an ext-warn if the arg list wasn't empty and if + // it was not a constant expression. + if (!getLang().C99) { + // TODO: check C90 array constant exprness. + if (isStar || StaticLoc.isValid() || + 0/*TODO: NumElts is not a C90 constantexpr */) + Diag(StartLoc, diag::ext_c99_array_usage); + } + + // Remember that we parsed a pointer type, and remember the type-quals. + D.AddTypeInfo(DeclaratorChunk::getArray(DS.getTypeQualifiers(), + StaticLoc.isValid(), isStar, + NumElements.Val, StartLoc)); +} + diff --git a/Parse/ParseExpr.cpp b/Parse/ParseExpr.cpp new file mode 100644 index 0000000000..41f347f9aa --- /dev/null +++ b/Parse/ParseExpr.cpp @@ -0,0 +1,935 @@ +//===--- ParseExpr.cpp - Expression Parsing -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Expression parsing implementation. Expressions in +// C99 basically consist of a bunch of binary operators with unary operators and +// other random stuff at the leaves. +// +// In the C99 grammar, these unary operators bind tightest and are represented +// as the 'cast-expression' production. Everything else is either a binary +// operator (e.g. '/') or a ternary operator ("?:"). The unary leaves are +// handled by ParseCastExpression, the higher level pieces are handled by +// ParseBinaryExpression. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/Parser.h" +#include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallString.h" +using namespace clang; + +/// PrecedenceLevels - These are precedences for the binary/ternary operators in +/// the C99 grammar. These have been named to relate with the C99 grammar +/// productions. Low precedences numbers bind more weakly than high numbers. +namespace prec { + enum Level { + Unknown = 0, // Not binary operator. + Comma = 1, // , + Assignment = 2, // =, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |= + Conditional = 3, // ? + LogicalOr = 4, // || + LogicalAnd = 5, // && + InclusiveOr = 6, // | + ExclusiveOr = 7, // ^ + And = 8, // & + Equality = 9, // ==, != + Relational = 10, // >=, <=, >, < + Shift = 11, // <<, >> + Additive = 12, // -, + + Multiplicative = 13 // *, /, % + }; +} + + +/// getBinOpPrecedence - Return the precedence of the specified binary operator +/// token. This returns: +/// +static prec::Level getBinOpPrecedence(tok::TokenKind Kind) { + switch (Kind) { + default: return prec::Unknown; + case tok::comma: return prec::Comma; + case tok::equal: + case tok::starequal: + case tok::slashequal: + case tok::percentequal: + case tok::plusequal: + case tok::minusequal: + case tok::lesslessequal: + case tok::greatergreaterequal: + case tok::ampequal: + case tok::caretequal: + case tok::pipeequal: return prec::Assignment; + case tok::question: return prec::Conditional; + case tok::pipepipe: return prec::LogicalOr; + case tok::ampamp: return prec::LogicalAnd; + case tok::pipe: return prec::InclusiveOr; + case tok::caret: return prec::ExclusiveOr; + case tok::amp: return prec::And; + case tok::exclaimequal: + case tok::equalequal: return prec::Equality; + case tok::lessequal: + case tok::less: + case tok::greaterequal: + case tok::greater: return prec::Relational; + case tok::lessless: + case tok::greatergreater: return prec::Shift; + case tok::plus: + case tok::minus: return prec::Additive; + case tok::percent: + case tok::slash: + case tok::star: return prec::Multiplicative; + } +} + + +/// ParseExpression - Simple precedence-based parser for binary/ternary +/// operators. +/// +/// Note: we diverge from the C99 grammar when parsing the assignment-expression +/// production. C99 specifies that the LHS of an assignment operator should be +/// parsed as a unary-expression, but consistency dictates that it be a +/// conditional-expession. In practice, the important thing here is that the +/// LHS of an assignment has to be an l-value, which productions between +/// unary-expression and conditional-expression don't produce. Because we want +/// consistency, we parse the LHS as a conditional-expression, then check for +/// l-value-ness in semantic analysis stages. +/// +/// multiplicative-expression: [C99 6.5.5] +/// cast-expression +/// multiplicative-expression '*' cast-expression +/// multiplicative-expression '/' cast-expression +/// multiplicative-expression '%' cast-expression +/// +/// additive-expression: [C99 6.5.6] +/// multiplicative-expression +/// additive-expression '+' multiplicative-expression +/// additive-expression '-' multiplicative-expression +/// +/// shift-expression: [C99 6.5.7] +/// additive-expression +/// shift-expression '<<' additive-expression +/// shift-expression '>>' additive-expression +/// +/// relational-expression: [C99 6.5.8] +/// shift-expression +/// relational-expression '<' shift-expression +/// relational-expression '>' shift-expression +/// relational-expression '<=' shift-expression +/// relational-expression '>=' shift-expression +/// +/// equality-expression: [C99 6.5.9] +/// relational-expression +/// equality-expression '==' relational-expression +/// equality-expression '!=' relational-expression +/// +/// AND-expression: [C99 6.5.10] +/// equality-expression +/// AND-expression '&' equality-expression +/// +/// exclusive-OR-expression: [C99 6.5.11] +/// AND-expression +/// exclusive-OR-expression '^' AND-expression +/// +/// inclusive-OR-expression: [C99 6.5.12] +/// exclusive-OR-expression +/// inclusive-OR-expression '|' exclusive-OR-expression +/// +/// logical-AND-expression: [C99 6.5.13] +/// inclusive-OR-expression +/// logical-AND-expression '&&' inclusive-OR-expression +/// +/// logical-OR-expression: [C99 6.5.14] +/// logical-AND-expression +/// logical-OR-expression '||' logical-AND-expression +/// +/// conditional-expression: [C99 6.5.15] +/// logical-OR-expression +/// logical-OR-expression '?' expression ':' conditional-expression +/// [GNU] logical-OR-expression '?' ':' conditional-expression +/// +/// assignment-expression: [C99 6.5.16] +/// conditional-expression +/// unary-expression assignment-operator assignment-expression +/// +/// assignment-operator: one of +/// = *= /= %= += -= <<= >>= &= ^= |= +/// +/// expression: [C99 6.5.17] +/// assignment-expression +/// expression ',' assignment-expression +/// +Parser::ExprResult Parser::ParseExpression() { + ExprResult LHS = ParseCastExpression(false); + if (LHS.isInvalid) return LHS; + + return ParseRHSOfBinaryExpression(LHS, prec::Comma); +} + +/// ParseAssignmentExpression - Parse an expr that doesn't include commas. +/// +Parser::ExprResult Parser::ParseAssignmentExpression() { + ExprResult LHS = ParseCastExpression(false); + if (LHS.isInvalid) return LHS; + + return ParseRHSOfBinaryExpression(LHS, prec::Assignment); +} + +Parser::ExprResult Parser::ParseConstantExpression() { + ExprResult LHS = ParseCastExpression(false); + if (LHS.isInvalid) return LHS; + + // TODO: Validate that this is a constant expr! + return ParseRHSOfBinaryExpression(LHS, prec::Conditional); +} + +/// ParseExpressionWithLeadingIdentifier - This special purpose method is used +/// in contexts where we have already consumed an identifier (which we saved in +/// 'IdTok'), then discovered that the identifier was really the leading token +/// of part of an expression. For example, in "A[1]+B", we consumed "A" (which +/// is now in 'IdTok') and the current token is "[". +Parser::ExprResult Parser:: +ParseExpressionWithLeadingIdentifier(const LexerToken &IdTok) { + // We know that 'IdTok' must correspond to this production: + // primary-expression: identifier + + // Let the actions module handle the identifier. + ExprResult Res = Actions.ParseIdentifierExpr(CurScope, IdTok.getLocation(), + *IdTok.getIdentifierInfo(), + Tok.getKind() == tok::l_paren); + + // Because we have to parse an entire cast-expression before starting the + // ParseRHSOfBinaryExpression method (which parses any trailing binops), we + // need to handle the 'postfix-expression' rules. We do this by invoking + // ParsePostfixExpressionSuffix to consume any postfix-expression suffixes: + Res = ParsePostfixExpressionSuffix(Res); + if (Res.isInvalid) return Res; + + // At this point, the "A[1]" part of "A[1]+B" has been consumed. Once this is + // done, we know we don't have to do anything for cast-expression, because the + // only non-postfix-expression production starts with a '(' token, and we know + // we have an identifier. As such, we can invoke ParseRHSOfBinaryExpression + // to consume any trailing operators (e.g. "+" in this example) and connected + // chunks of the expression. + return ParseRHSOfBinaryExpression(Res, prec::Comma); +} + +/// ParseExpressionWithLeadingIdentifier - This special purpose method is used +/// in contexts where we have already consumed an identifier (which we saved in +/// 'IdTok'), then discovered that the identifier was really the leading token +/// of part of an assignment-expression. For example, in "A[1]+B", we consumed +/// "A" (which is now in 'IdTok') and the current token is "[". +Parser::ExprResult Parser:: +ParseAssignmentExprWithLeadingIdentifier(const LexerToken &IdTok) { + // We know that 'IdTok' must correspond to this production: + // primary-expression: identifier + + // Let the actions module handle the identifier. + ExprResult Res = Actions.ParseIdentifierExpr(CurScope, IdTok.getLocation(), + *IdTok.getIdentifierInfo(), + Tok.getKind() == tok::l_paren); + + // Because we have to parse an entire cast-expression before starting the + // ParseRHSOfBinaryExpression method (which parses any trailing binops), we + // need to handle the 'postfix-expression' rules. We do this by invoking + // ParsePostfixExpressionSuffix to consume any postfix-expression suffixes: + Res = ParsePostfixExpressionSuffix(Res); + if (Res.isInvalid) return Res; + + // At this point, the "A[1]" part of "A[1]+B" has been consumed. Once this is + // done, we know we don't have to do anything for cast-expression, because the + // only non-postfix-expression production starts with a '(' token, and we know + // we have an identifier. As such, we can invoke ParseRHSOfBinaryExpression + // to consume any trailing operators (e.g. "+" in this example) and connected + // chunks of the expression. + return ParseRHSOfBinaryExpression(Res, prec::Assignment); +} + + +/// ParseAssignmentExpressionWithLeadingStar - This special purpose method is +/// used in contexts where we have already consumed a '*' (which we saved in +/// 'StarTok'), then discovered that the '*' was really the leading token of an +/// expression. For example, in "*(int*)P+B", we consumed "*" (which is +/// now in 'StarTok') and the current token is "(". +Parser::ExprResult Parser:: +ParseAssignmentExpressionWithLeadingStar(const LexerToken &StarTok) { + // We know that 'StarTok' must correspond to this production: + // unary-expression: unary-operator cast-expression + // where 'unary-operator' is '*'. + + // Parse the cast-expression that follows the '*'. This will parse the + // "*(int*)P" part of "*(int*)P+B". + ExprResult Res = ParseCastExpression(false); + if (Res.isInvalid) return Res; + + // Combine StarTok + Res to get the new AST for the combined expression.. + Res = Actions.ParseUnaryOp(StarTok.getLocation(), tok::star, Res.Val); + if (Res.isInvalid) return Res; + + + // We have to parse an entire cast-expression before starting the + // ParseRHSOfBinaryExpression method (which parses any trailing binops). Since + // we know that the only production above us is the cast-expression + // production, and because the only alternative productions start with a '(' + // token (we know we had a '*'), there is no work to do to get a whole + // cast-expression. + + // At this point, the "*(int*)P" part of "*(int*)P+B" has been consumed. Once + // this is done, we can invoke ParseRHSOfBinaryExpression to consume any + // trailing operators (e.g. "+" in this example) and connected chunks of the + // assignment-expression. + return ParseRHSOfBinaryExpression(Res, prec::Assignment); +} + + +/// ParseRHSOfBinaryExpression - Parse a binary expression that starts with +/// LHS and has a precedence of at least MinPrec. +Parser::ExprResult +Parser::ParseRHSOfBinaryExpression(ExprResult LHS, unsigned MinPrec) { + unsigned NextTokPrec = getBinOpPrecedence(Tok.getKind()); + SourceLocation ColonLoc; + + while (1) { + // If this token has a lower precedence than we are allowed to parse (e.g. + // because we are called recursively, or because the token is not a binop), + // then we are done! + if (NextTokPrec < MinPrec) + return LHS; + + // Consume the operator, saving the operator token for error reporting. + LexerToken OpToken = Tok; + ConsumeToken(); + + // Special case handling for the ternary operator. + ExprResult TernaryMiddle(true); + if (NextTokPrec == prec::Conditional) { + if (Tok.getKind() != tok::colon) { + // Handle this production specially: + // logical-OR-expression '?' expression ':' conditional-expression + // In particular, the RHS of the '?' is 'expression', not + // 'logical-OR-expression' as we might expect. + TernaryMiddle = ParseExpression(); + if (TernaryMiddle.isInvalid) return TernaryMiddle; + } else { + // Special case handling of "X ? Y : Z" where Y is empty: + // logical-OR-expression '?' ':' conditional-expression [GNU] + TernaryMiddle = ExprResult(false); + Diag(Tok, diag::ext_gnu_conditional_expr); + } + + if (Tok.getKind() != tok::colon) { + Diag(Tok, diag::err_expected_colon); + Diag(OpToken, diag::err_matching, "?"); + return ExprResult(true); + } + + // Eat the colon. + ColonLoc = ConsumeToken(); + } + + // Parse another leaf here for the RHS of the operator. + ExprResult RHS = ParseCastExpression(false); + if (RHS.isInvalid) return RHS; + + // Remember the precedence of this operator and get the precedence of the + // operator immediately to the right of the RHS. + unsigned ThisPrec = NextTokPrec; + NextTokPrec = getBinOpPrecedence(Tok.getKind()); + + // Assignment and conditional expressions are right-associative. + bool isRightAssoc = NextTokPrec == prec::Conditional || + NextTokPrec == prec::Assignment; + + // Get the precedence of the operator to the right of the RHS. If it binds + // more tightly with RHS than we do, evaluate it completely first. + if (ThisPrec < NextTokPrec || + (ThisPrec == NextTokPrec && isRightAssoc)) { + // If this is left-associative, only parse things on the RHS that bind + // more tightly than the current operator. If it is left-associative, it + // is okay, to bind exactly as tightly. For example, compile A=B=C=D as + // A=(B=(C=D)), where each paren is a level of recursion here. + RHS = ParseRHSOfBinaryExpression(RHS, ThisPrec + !isRightAssoc); + if (RHS.isInvalid) return RHS; + + NextTokPrec = getBinOpPrecedence(Tok.getKind()); + } + assert(NextTokPrec <= ThisPrec && "Recursion didn't work!"); + + // Combine the LHS and RHS into the LHS (e.g. build AST). + if (TernaryMiddle.isInvalid) + LHS = Actions.ParseBinOp(OpToken.getLocation(), OpToken.getKind(), + LHS.Val, RHS.Val); + else + LHS = Actions.ParseConditionalOp(OpToken.getLocation(), ColonLoc, + LHS.Val, TernaryMiddle.Val, RHS.Val); + } +} + +/// ParseCastExpression - Parse a cast-expression, or, if isUnaryExpression is +/// true, parse a unary-expression. +/// +/// cast-expression: [C99 6.5.4] +/// unary-expression +/// '(' type-name ')' cast-expression +/// +/// unary-expression: [C99 6.5.3] +/// postfix-expression +/// '++' unary-expression +/// '--' unary-expression +/// unary-operator cast-expression +/// 'sizeof' unary-expression +/// 'sizeof' '(' type-name ')' +/// [GNU] '__alignof' unary-expression +/// [GNU] '__alignof' '(' type-name ')' +/// [GNU] '&&' identifier +/// +/// unary-operator: one of +/// '&' '*' '+' '-' '~' '!' +/// [GNU] '__extension__' '__real' '__imag' +/// +/// primary-expression: [C99 6.5.1] +/// identifier +/// constant +/// string-literal +/// [C++] boolean-literal [C++ 2.13.5] +/// '(' expression ')' +/// '__func__' [C99 6.4.2.2] +/// [GNU] '__FUNCTION__' +/// [GNU] '__PRETTY_FUNCTION__' +/// [GNU] '(' compound-statement ')' +/// [GNU] '__builtin_va_arg' '(' assignment-expression ',' type-name ')' +/// [GNU] '__builtin_offsetof' '(' type-name ',' offsetof-member-designator')' +/// [GNU] '__builtin_choose_expr' '(' assign-expr ',' assign-expr ',' +/// assign-expr ')' +/// [GNU] '__builtin_types_compatible_p' '(' type-name ',' type-name ')' +/// [OBC] '[' objc-receiver objc-message-args ']' [TODO] +/// [OBC] '@selector' '(' objc-selector-arg ')' [TODO] +/// [OBC] '@protocol' '(' identifier ')' [TODO] +/// [OBC] '@encode' '(' type-name ')' [TODO] +/// [OBC] objc-string-literal [TODO] +/// [C++] 'const_cast' '<' type-name '>' '(' expression ')' [C++ 5.2p1] +/// [C++] 'dynamic_cast' '<' type-name '>' '(' expression ')' [C++ 5.2p1] +/// [C++] 'reinterpret_cast' '<' type-name '>' '(' expression ')' [C++ 5.2p1] +/// [C++] 'static_cast' '<' type-name '>' '(' expression ')' [C++ 5.2p1] +/// +/// constant: [C99 6.4.4] +/// integer-constant +/// floating-constant +/// enumeration-constant -> identifier +/// character-constant +/// +Parser::ExprResult Parser::ParseCastExpression(bool isUnaryExpression) { + ExprResult Res; + tok::TokenKind SavedKind = Tok.getKind(); + + // This handles all of cast-expression, unary-expression, postfix-expression, + // and primary-expression. We handle them together like this for efficiency + // and to simplify handling of an expression starting with a '(' token: which + // may be one of a parenthesized expression, cast-expression, compound literal + // expression, or statement expression. + // + // If the parsed tokens consist of a primary-expression, the cases below + // call ParsePostfixExpressionSuffix to handle the postfix expression + // suffixes. Cases that cannot be followed by postfix exprs should + // return without invoking ParsePostfixExpressionSuffix. + switch (SavedKind) { + case tok::l_paren: { + // If this expression is limited to being a unary-expression, the parent can + // not start a cast expression. + ParenParseOption ParenExprType = + isUnaryExpression ? CompoundLiteral : CastExpr; + TypeTy *CastTy; + SourceLocation LParenLoc = Tok.getLocation(); + SourceLocation RParenLoc; + Res = ParseParenExpression(ParenExprType, CastTy, RParenLoc); + if (Res.isInvalid) return Res; + + switch (ParenExprType) { + case SimpleExpr: break; // Nothing else to do. + case CompoundStmt: break; // Nothing else to do. + case CompoundLiteral: + // We parsed '(' type-name ')' '{' ... '}'. If any suffixes of + // postfix-expression exist, parse them now. + break; + case CastExpr: + // We parsed '(' type-name ')' and the thing after it wasn't a '{'. Parse + // the cast-expression that follows it next. + // TODO: For cast expression with CastTy. + Res = ParseCastExpression(false); + if (!Res.isInvalid) + Res = Actions.ParseCastExpr(LParenLoc, CastTy, RParenLoc, Res.Val); + return Res; + } + + // These can be followed by postfix-expr pieces. + return ParsePostfixExpressionSuffix(Res); + } + + // primary-expression + case tok::numeric_constant: + // constant: integer-constant + // constant: floating-constant + + Res = Actions.ParseNumericConstant(Tok); + ConsumeToken(); + + // These can be followed by postfix-expr pieces. + return ParsePostfixExpressionSuffix(Res); + + case tok::kw_true: + case tok::kw_false: + return ParseCXXBoolLiteral(); + + case tok::identifier: { // primary-expression: identifier + // constant: enumeration-constant + // Consume the identifier so that we can see if it is followed by a '('. + // Function designators are allowed to be undeclared (C99 6.5.1p2), so we + // need to know whether or not this identifier is a function designator or + // not. + IdentifierInfo &II = *Tok.getIdentifierInfo(); + SourceLocation L = ConsumeToken(); + Res = Actions.ParseIdentifierExpr(CurScope, L, II, + Tok.getKind() == tok::l_paren); + // These can be followed by postfix-expr pieces. + return ParsePostfixExpressionSuffix(Res); + } + case tok::char_constant: // constant: character-constant + Res = Actions.ParseCharacterConstant(Tok); + ConsumeToken(); + // These can be followed by postfix-expr pieces. + return ParsePostfixExpressionSuffix(Res); + case tok::kw___func__: // primary-expression: __func__ [C99 6.4.2.2] + case tok::kw___FUNCTION__: // primary-expression: __FUNCTION__ [GNU] + case tok::kw___PRETTY_FUNCTION__: // primary-expression: __P..Y_F..N__ [GNU] + Res = Actions.ParseSimplePrimaryExpr(Tok.getLocation(), SavedKind); + ConsumeToken(); + // These can be followed by postfix-expr pieces. + return ParsePostfixExpressionSuffix(Res); + case tok::string_literal: // primary-expression: string-literal + case tok::wide_string_literal: + Res = ParseStringLiteralExpression(); + if (Res.isInvalid) return Res; + // This can be followed by postfix-expr pieces (e.g. "foo"[1]). + return ParsePostfixExpressionSuffix(Res); + case tok::kw___builtin_va_arg: + case tok::kw___builtin_offsetof: + case tok::kw___builtin_choose_expr: + case tok::kw___builtin_types_compatible_p: + return ParseBuiltinPrimaryExpression(); + case tok::plusplus: // unary-expression: '++' unary-expression + case tok::minusminus: { // unary-expression: '--' unary-expression + SourceLocation SavedLoc = ConsumeToken(); + Res = ParseCastExpression(true); + if (!Res.isInvalid) + Res = Actions.ParseUnaryOp(SavedLoc, SavedKind, Res.Val); + return Res; + } + case tok::amp: // unary-expression: '&' cast-expression + case tok::star: // unary-expression: '*' cast-expression + case tok::plus: // unary-expression: '+' cast-expression + case tok::minus: // unary-expression: '-' cast-expression + case tok::tilde: // unary-expression: '~' cast-expression + case tok::exclaim: // unary-expression: '!' cast-expression + case tok::kw___real: // unary-expression: '__real' cast-expression [GNU] + case tok::kw___imag: // unary-expression: '__imag' cast-expression [GNU] + case tok::kw___extension__:{//unary-expression:'__extension__' cast-expr [GNU] + // FIXME: Extension not handled correctly here! + SourceLocation SavedLoc = ConsumeToken(); + Res = ParseCastExpression(false); + if (!Res.isInvalid) + Res = Actions.ParseUnaryOp(SavedLoc, SavedKind, Res.Val); + return Res; + } + case tok::kw_sizeof: // unary-expression: 'sizeof' unary-expression + // unary-expression: 'sizeof' '(' type-name ')' + case tok::kw___alignof: // unary-expression: '__alignof' unary-expression + // unary-expression: '__alignof' '(' type-name ')' + return ParseSizeofAlignofExpression(); + case tok::ampamp: { // unary-expression: '&&' identifier + SourceLocation AmpAmpLoc = ConsumeToken(); + if (Tok.getKind() != tok::identifier) { + Diag(Tok, diag::err_expected_ident); + return ExprResult(true); + } + + Diag(AmpAmpLoc, diag::ext_gnu_address_of_label); + Res = Actions.ParseAddrLabel(AmpAmpLoc, Tok.getLocation(), + Tok.getIdentifierInfo()); + ConsumeToken(); + return Res; + } + case tok::kw_const_cast: + case tok::kw_dynamic_cast: + case tok::kw_reinterpret_cast: + case tok::kw_static_cast: + return ParseCXXCasts(); + default: + Diag(Tok, diag::err_expected_expression); + return ExprResult(true); + } + + // unreachable. + abort(); +} + +/// ParsePostfixExpressionSuffix - Once the leading part of a postfix-expression +/// is parsed, this method parses any suffixes that apply. +/// +/// postfix-expression: [C99 6.5.2] +/// primary-expression +/// postfix-expression '[' expression ']' +/// postfix-expression '(' argument-expression-list[opt] ')' +/// postfix-expression '.' identifier +/// postfix-expression '->' identifier +/// postfix-expression '++' +/// postfix-expression '--' +/// '(' type-name ')' '{' initializer-list '}' +/// '(' type-name ')' '{' initializer-list ',' '}' +/// +/// argument-expression-list: [C99 6.5.2] +/// argument-expression +/// argument-expression-list ',' assignment-expression +/// +Parser::ExprResult Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { + + // Now that the primary-expression piece of the postfix-expression has been + // parsed, see if there are any postfix-expression pieces here. + SourceLocation Loc; + while (1) { + switch (Tok.getKind()) { + default: // Not a postfix-expression suffix. + return LHS; + case tok::l_square: { // postfix-expression: p-e '[' expression ']' + Loc = ConsumeBracket(); + ExprResult Idx = ParseExpression(); + + SourceLocation RLoc = Tok.getLocation(); + + if (!LHS.isInvalid && !Idx.isInvalid && Tok.getKind() == tok::r_square) + LHS = Actions.ParseArraySubscriptExpr(LHS.Val, Loc, Idx.Val, RLoc); + else + LHS = ExprResult(true); + + // Match the ']'. + MatchRHSPunctuation(tok::r_square, Loc); + break; + } + + case tok::l_paren: { // p-e: p-e '(' argument-expression-list[opt] ')' + llvm::SmallVector<ExprTy*, 8> ArgExprs; + llvm::SmallVector<SourceLocation, 8> CommaLocs; + bool ArgExprsOk = true; + + Loc = ConsumeParen(); + + if (Tok.getKind() != tok::r_paren) { + while (1) { + ExprResult ArgExpr = ParseAssignmentExpression(); + if (ArgExpr.isInvalid) { + ArgExprsOk = false; + SkipUntil(tok::r_paren); + break; + } else + ArgExprs.push_back(ArgExpr.Val); + + if (Tok.getKind() != tok::comma) + break; + // Move to the next argument, remember where the comma was. + CommaLocs.push_back(ConsumeToken()); + } + } + + // Match the ')'. + if (!LHS.isInvalid && ArgExprsOk && Tok.getKind() == tok::r_paren) { + assert((ArgExprs.size() == 0 || ArgExprs.size()-1 == CommaLocs.size())&& + "Unexpected number of commas!"); + LHS = Actions.ParseCallExpr(LHS.Val, Loc, &ArgExprs[0], ArgExprs.size(), + &CommaLocs[0], Tok.getLocation()); + } + + if (ArgExprsOk) + MatchRHSPunctuation(tok::r_paren, Loc); + break; + } + case tok::arrow: // postfix-expression: p-e '->' identifier + case tok::period: { // postfix-expression: p-e '.' identifier + tok::TokenKind OpKind = Tok.getKind(); + SourceLocation OpLoc = ConsumeToken(); // Eat the "." or "->" token. + + if (Tok.getKind() != tok::identifier) { + Diag(Tok, diag::err_expected_ident); + return ExprResult(true); + } + + if (!LHS.isInvalid) + LHS = Actions.ParseMemberReferenceExpr(LHS.Val, OpLoc, OpKind, + Tok.getLocation(), + *Tok.getIdentifierInfo()); + ConsumeToken(); + break; + } + case tok::plusplus: // postfix-expression: postfix-expression '++' + case tok::minusminus: // postfix-expression: postfix-expression '--' + if (!LHS.isInvalid) + LHS = Actions.ParsePostfixUnaryOp(Tok.getLocation(), Tok.getKind(), + LHS.Val); + ConsumeToken(); + break; + } + } +} + + +/// ParseSizeofAlignofExpression - Parse a sizeof or alignof expression. +/// unary-expression: [C99 6.5.3] +/// 'sizeof' unary-expression +/// 'sizeof' '(' type-name ')' +/// [GNU] '__alignof' unary-expression +/// [GNU] '__alignof' '(' type-name ')' +Parser::ExprResult Parser::ParseSizeofAlignofExpression() { + assert((Tok.getKind() == tok::kw_sizeof || + Tok.getKind() == tok::kw___alignof) && + "Not a sizeof/alignof expression!"); + LexerToken OpTok = Tok; + ConsumeToken(); + + // If the operand doesn't start with an '(', it must be an expression. + ExprResult Operand; + if (Tok.getKind() != tok::l_paren) { + Operand = ParseCastExpression(true); + } else { + // If it starts with a '(', we know that it is either a parenthesized + // type-name, or it is a unary-expression that starts with a compound + // literal, or starts with a primary-expression that is a parenthesized + // expression. + ParenParseOption ExprType = CastExpr; + TypeTy *CastTy; + SourceLocation LParenLoc = Tok.getLocation(), RParenLoc; + Operand = ParseParenExpression(ExprType, CastTy, RParenLoc); + + // If ParseParenExpression parsed a '(typename)' sequence only, the this is + // sizeof/alignof a type. Otherwise, it is sizeof/alignof an expression. + if (ExprType == CastExpr) { + return Actions.ParseSizeOfAlignOfTypeExpr(OpTok.getLocation(), + OpTok.getKind() == tok::kw_sizeof, + LParenLoc, CastTy, RParenLoc); + } + } + + // If we get here, the operand to the sizeof/alignof was an expresion. + if (!Operand.isInvalid) + Operand = Actions.ParseUnaryOp(OpTok.getLocation(), OpTok.getKind(), + Operand.Val); + return Operand; +} + +/// ParseBuiltinPrimaryExpression +/// +/// primary-expression: [C99 6.5.1] +/// [GNU] '__builtin_va_arg' '(' assignment-expression ',' type-name ')' +/// [GNU] '__builtin_offsetof' '(' type-name ',' offsetof-member-designator')' +/// [GNU] '__builtin_choose_expr' '(' assign-expr ',' assign-expr ',' +/// assign-expr ')' +/// [GNU] '__builtin_types_compatible_p' '(' type-name ',' type-name ')' +/// +/// [GNU] offsetof-member-designator: +/// [GNU] identifier +/// [GNU] offsetof-member-designator '.' identifier +/// [GNU] offsetof-member-designator '[' expression ']' +/// +Parser::ExprResult Parser::ParseBuiltinPrimaryExpression() { + ExprResult Res(false); + const IdentifierInfo *BuiltinII = Tok.getIdentifierInfo(); + + tok::TokenKind T = Tok.getKind(); + SourceLocation StartLoc = ConsumeToken(); // Eat the builtin identifier. + + // All of these start with an open paren. + if (Tok.getKind() != tok::l_paren) { + Diag(Tok, diag::err_expected_lparen_after, BuiltinII->getName()); + return ExprResult(true); + } + + SourceLocation LParenLoc = ConsumeParen(); + // TODO: Build AST. + + switch (T) { + default: assert(0 && "Not a builtin primary expression!"); + case tok::kw___builtin_va_arg: + Res = ParseAssignmentExpression(); + if (Res.isInvalid) { + SkipUntil(tok::r_paren); + return Res; + } + + if (ExpectAndConsume(tok::comma, diag::err_expected_comma, "",tok::r_paren)) + return ExprResult(true); + + ParseTypeName(); + break; + + case tok::kw___builtin_offsetof: + ParseTypeName(); + + if (ExpectAndConsume(tok::comma, diag::err_expected_comma, "",tok::r_paren)) + return ExprResult(true); + + // We must have at least one identifier here. + if (ExpectAndConsume(tok::identifier, diag::err_expected_ident, "", + tok::r_paren)) + return ExprResult(true); + + while (1) { + if (Tok.getKind() == tok::period) { + // offsetof-member-designator: offsetof-member-designator '.' identifier + ConsumeToken(); + + if (ExpectAndConsume(tok::identifier, diag::err_expected_ident, "", + tok::r_paren)) + return ExprResult(true); + } else if (Tok.getKind() == tok::l_square) { + // offsetof-member-designator: offsetof-member-design '[' expression ']' + SourceLocation LSquareLoc = ConsumeBracket(); + Res = ParseExpression(); + if (Res.isInvalid) { + SkipUntil(tok::r_paren); + return Res; + } + + MatchRHSPunctuation(tok::r_square, LSquareLoc); + } else { + break; + } + } + break; + case tok::kw___builtin_choose_expr: + Res = ParseAssignmentExpression(); + + if (ExpectAndConsume(tok::comma, diag::err_expected_comma, "",tok::r_paren)) + return ExprResult(true); + + Res = ParseAssignmentExpression(); + + if (ExpectAndConsume(tok::comma, diag::err_expected_comma, "",tok::r_paren)) + return ExprResult(true); + + Res = ParseAssignmentExpression(); + break; + case tok::kw___builtin_types_compatible_p: + ParseTypeName(); + + if (ExpectAndConsume(tok::comma, diag::err_expected_comma, "",tok::r_paren)) + return ExprResult(true); + + ParseTypeName(); + break; + } + + MatchRHSPunctuation(tok::r_paren, LParenLoc); + + // These can be followed by postfix-expr pieces because they are + // primary-expressions. + return ParsePostfixExpressionSuffix(Res); +} + +/// ParseParenExpression - This parses the unit that starts with a '(' token, +/// based on what is allowed by ExprType. The actual thing parsed is returned +/// in ExprType. +/// +/// primary-expression: [C99 6.5.1] +/// '(' expression ')' +/// [GNU] '(' compound-statement ')' (if !ParenExprOnly) +/// postfix-expression: [C99 6.5.2] +/// '(' type-name ')' '{' initializer-list '}' +/// '(' type-name ')' '{' initializer-list ',' '}' +/// cast-expression: [C99 6.5.4] +/// '(' type-name ')' cast-expression +/// +Parser::ExprResult Parser::ParseParenExpression(ParenParseOption &ExprType, + TypeTy *&CastTy, + SourceLocation &RParenLoc) { + assert(Tok.getKind() == tok::l_paren && "Not a paren expr!"); + SourceLocation OpenLoc = ConsumeParen(); + ExprResult Result(false); + CastTy = 0; + + if (ExprType >= CompoundStmt && Tok.getKind() == tok::l_brace && + !getLang().NoExtensions) { + Diag(Tok, diag::ext_gnu_statement_expr); + ParseCompoundStatement(); + ExprType = CompoundStmt; + // TODO: Build AST for GNU compound stmt. + } else if (ExprType >= CompoundLiteral && isTypeSpecifierQualifier()) { + // Otherwise, this is a compound literal expression or cast expression. + TypeTy *Ty = ParseTypeName(); + + // Match the ')'. + if (Tok.getKind() == tok::r_paren) + RParenLoc = ConsumeParen(); + else + MatchRHSPunctuation(tok::r_paren, OpenLoc); + + if (Tok.getKind() == tok::l_brace) { + if (!getLang().C99) // Compound literals don't exist in C90. + Diag(OpenLoc, diag::ext_c99_compound_literal); + Result = ParseInitializer(); + ExprType = CompoundLiteral; + // TODO: Build AST for compound literal. + } else if (ExprType == CastExpr) { + // Note that this doesn't parse the subsequence cast-expression, it just + // returns the parsed type to the callee. + ExprType = CastExpr; + CastTy = Ty; + return ExprResult(false); + } else { + Diag(Tok, diag::err_expected_lbrace_in_compound_literal); + return ExprResult(true); + } + return Result; + } else { + Result = ParseExpression(); + ExprType = SimpleExpr; + if (!Result.isInvalid && Tok.getKind() == tok::r_paren) + Result = Actions.ParseParenExpr(OpenLoc, Tok.getLocation(), Result.Val); + } + + // Match the ')'. + if (Result.isInvalid) + SkipUntil(tok::r_paren); + else { + if (Tok.getKind() == tok::r_paren) + RParenLoc = ConsumeParen(); + else + MatchRHSPunctuation(tok::r_paren, OpenLoc); + } + + return Result; +} + +/// ParseStringLiteralExpression - This handles the various token types that +/// form string literals, and also handles string concatenation [C99 5.1.1.2, +/// translation phase #6]. +/// +/// primary-expression: [C99 6.5.1] +/// string-literal +Parser::ExprResult Parser::ParseStringLiteralExpression() { + assert(isTokenStringLiteral() && "Not a string literal!"); + + // String concat. Note that keywords like __func__ and __FUNCTION__ are not + // considered to be strings for concatenation purposes. + llvm::SmallVector<LexerToken, 4> StringToks; + + do { + StringToks.push_back(Tok); + ConsumeStringToken(); + } while (isTokenStringLiteral()); + + // Pass the set of string tokens, ready for concatenation, to the actions. + return Actions.ParseStringLiteral(&StringToks[0], StringToks.size()); +} diff --git a/Parse/ParseExprCXX.cpp b/Parse/ParseExprCXX.cpp new file mode 100644 index 0000000000..7048660467 --- /dev/null +++ b/Parse/ParseExprCXX.cpp @@ -0,0 +1,78 @@ +//===--- ParseExprCXX.cpp - C++ Expression Parsing ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Bill Wendling and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Expression parsing implementation for C++. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/Diagnostic.h" +#include "clang/Parse/Parser.h" +using namespace clang; + +/// ParseCXXCasts - This handles the various ways to cast expressions to another +/// type. +/// +/// postfix-expression: [C++ 5.2p1] +/// 'dynamic_cast' '<' type-name '>' '(' expression ')' +/// 'static_cast' '<' type-name '>' '(' expression ')' +/// 'reinterpret_cast' '<' type-name '>' '(' expression ')' +/// 'const_cast' '<' type-name '>' '(' expression ')' +/// +Parser::ExprResult Parser::ParseCXXCasts() { + tok::TokenKind Kind = Tok.getKind(); + const char *CastName = 0; // For error messages + + switch (Kind) { + default: assert(0 && "Unknown C++ cast!"); abort(); + case tok::kw_const_cast: CastName = "const_cast"; break; + case tok::kw_dynamic_cast: CastName = "dynamic_cast"; break; + case tok::kw_reinterpret_cast: CastName = "reinterpret_cast"; break; + case tok::kw_static_cast: CastName = "static_cast"; break; + } + + SourceLocation OpLoc = ConsumeToken(); + SourceLocation LAngleBracketLoc = Tok.getLocation(); + + if (ExpectAndConsume(tok::less, diag::err_expected_less_after, CastName)) + return ExprResult(true); + + TypeTy *CastTy = ParseTypeName(); + SourceLocation RAngleBracketLoc = Tok.getLocation(); + + if (ExpectAndConsume(tok::greater, diag::err_expected_greater)) { + Diag(LAngleBracketLoc, diag::err_matching, "<"); + return ExprResult(true); + } + + SourceLocation LParenLoc = Tok.getLocation(), RParenLoc; + + if (Tok.getKind() != tok::l_paren) { + Diag(Tok, diag::err_expected_lparen_after, CastName); + return ExprResult(true); + } + + ExprResult Result = ParseSimpleParenExpression(RParenLoc); + + if (!Result.isInvalid) + Result = Actions.ParseCXXCasts(OpLoc, Kind, + LAngleBracketLoc, CastTy, RAngleBracketLoc, + LParenLoc, Result.Val, RParenLoc); + + return Result; +} + +/// ParseCXXBoolLiteral - This handles the C++ Boolean literals. +/// +/// boolean-literal: [C++ 2.13.5] +/// 'true' +/// 'false' +Parser::ExprResult Parser::ParseCXXBoolLiteral() { + tok::TokenKind Kind = Tok.getKind(); + return Actions.ParseCXXBoolLiteral(ConsumeToken(), Kind); +} diff --git a/Parse/ParseInit.cpp b/Parse/ParseInit.cpp new file mode 100644 index 0000000000..01169d8859 --- /dev/null +++ b/Parse/ParseInit.cpp @@ -0,0 +1,188 @@ +//===--- ParseInit.cpp - Initializer Parsing ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements initializer parsing as specified by C99 6.7.8. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/Parser.h" +#include "clang/Basic/Diagnostic.h" +using namespace clang; + + +/// MayBeDesignationStart - Return true if this token might be the start of a +/// designator. +static bool MayBeDesignationStart(tok::TokenKind K) { + switch (K) { + default: return false; + case tok::period: // designator: '.' identifier + case tok::l_square: // designator: array-designator + case tok::identifier: // designation: identifier ':' + return true; + } +} + +/// ParseInitializerWithPotentialDesignator - Parse the 'initializer' production +/// checking to see if the token stream starts with a designator. +/// +/// designation: +/// designator-list '=' +/// [GNU] array-designator +/// [GNU] identifier ':' +/// +/// designator-list: +/// designator +/// designator-list designator +/// +/// designator: +/// array-designator +/// '.' identifier +/// +/// array-designator: +/// '[' constant-expression ']' +/// [GNU] '[' constant-expression '...' constant-expression ']' +/// +/// NOTE: [OBC] allows '[ objc-receiver objc-message-args ]' as an +/// initializer. We need to consider this case when parsing array designators. +/// +Parser::ExprResult Parser::ParseInitializerWithPotentialDesignator() { + // Parse each designator in the designator list until we find an initializer. + while (1) { + switch (Tok.getKind()) { + case tok::equal: + // We read some number (at least one due to the grammar we implemented) + // of designators and found an '=' sign. The following tokens must be + // the initializer. + ConsumeToken(); + return ParseInitializer(); + + default: { + // We read some number (at least one due to the grammar we implemented) + // of designators and found something that isn't an = or an initializer. + // If we have exactly one array designator [TODO CHECK], this is the GNU + // 'designation: array-designator' extension. Otherwise, it is a parse + // error. + SourceLocation Loc = Tok.getLocation(); + ExprResult Init = ParseInitializer(); + if (Init.isInvalid) return Init; + + Diag(Tok, diag::ext_gnu_missing_equal_designator); + return Init; + } + case tok::period: + // designator: '.' identifier + ConsumeToken(); + if (ExpectAndConsume(tok::identifier, diag::err_expected_ident)) + return ExprResult(true); + break; + + case tok::l_square: { + // array-designator: '[' constant-expression ']' + // array-designator: '[' constant-expression '...' constant-expression ']' + SourceLocation StartLoc = ConsumeBracket(); + + ExprResult Idx = ParseConstantExpression(); + if (Idx.isInvalid) { + SkipUntil(tok::r_square); + return Idx; + } + + // Handle the gnu array range extension. + if (Tok.getKind() == tok::ellipsis) { + Diag(Tok, diag::ext_gnu_array_range); + ConsumeToken(); + + ExprResult RHS = ParseConstantExpression(); + if (RHS.isInvalid) { + SkipUntil(tok::r_square); + return RHS; + } + } + + MatchRHSPunctuation(tok::r_square, StartLoc); + break; + } + case tok::identifier: { + // Due to the GNU "designation: identifier ':'" extension, we don't know + // whether something starting with an identifier is an + // assignment-expression or if it is an old-style structure field + // designator. + // TODO: Check that this is the first designator. + LexerToken Ident = Tok; + ConsumeToken(); + + // If this is the gross GNU extension, handle it now. + if (Tok.getKind() == tok::colon) { + Diag(Ident, diag::ext_gnu_old_style_field_designator); + ConsumeToken(); + return ParseInitializer(); + } + + // Otherwise, we just consumed the first token of an expression. Parse + // the rest of it now. + return ParseAssignmentExprWithLeadingIdentifier(Ident); + } + } + } +} + + +/// ParseInitializer +/// initializer: [C99 6.7.8] +/// assignment-expression +/// '{' initializer-list '}' +/// '{' initializer-list ',' '}' +/// [GNU] '{' '}' +/// +/// initializer-list: +/// designation[opt] initializer +/// initializer-list ',' designation[opt] initializer +/// +Parser::ExprResult Parser::ParseInitializer() { + if (Tok.getKind() != tok::l_brace) + return ParseAssignmentExpression(); + + SourceLocation LBraceLoc = ConsumeBrace(); + + // We support empty initializers, but tell the user that they aren't using + // C99-clean code. + if (Tok.getKind() == tok::r_brace) + Diag(LBraceLoc, diag::ext_gnu_empty_initializer); + else { + while (1) { + // Parse: designation[opt] initializer + + // If we know that this cannot be a designation, just parse the nested + // initializer directly. + ExprResult SubElt; + if (!MayBeDesignationStart(Tok.getKind())) + SubElt = ParseInitializer(); + else + SubElt = ParseInitializerWithPotentialDesignator(); + + // If we couldn't parse the subelement, bail out. + if (SubElt.isInvalid) { + SkipUntil(tok::r_brace); + return SubElt; + } + + // If we don't have a comma continued list, we're done. + if (Tok.getKind() != tok::comma) break; + ConsumeToken(); + + // Handle trailing comma. + if (Tok.getKind() == tok::r_brace) break; + } + } + + // Match the '}'. + MatchRHSPunctuation(tok::r_brace, LBraceLoc); + return ExprResult(false); +} + diff --git a/Parse/ParseObjc.cpp b/Parse/ParseObjc.cpp new file mode 100644 index 0000000000..00b606ac1a --- /dev/null +++ b/Parse/ParseObjc.cpp @@ -0,0 +1,105 @@ +//===--- ParseObjc.cpp - Objective C Parsing ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Steve Naroff and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Objective-C portions of the Parser interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/Parser.h" +#include "clang/Basic/Diagnostic.h" +#include "llvm/ADT/SmallVector.h" +using namespace clang; + + +/// ParseExternalDeclaration: +/// external-declaration: [C99 6.9] +/// [OBJC] objc-class-definition +/// [OBJC] objc-class-declaration [TODO] +/// [OBJC] objc-alias-declaration [TODO] +/// [OBJC] objc-protocol-definition [TODO] +/// [OBJC] objc-method-definition [TODO] +/// [OBJC] '@' 'end' [TODO] +void Parser::ParseObjCAtDirectives() { + SourceLocation AtLoc = ConsumeToken(); // the "@" + + IdentifierInfo *II = Tok.getIdentifierInfo(); + switch (II ? II->getObjCKeywordID() : tok::objc_not_keyword) { + case tok::objc_class: + return ParseObjCAtClassDeclaration(AtLoc); + case tok::objc_interface: + return ParseObjCAtInterfaceDeclaration(); + case tok::objc_protocol: + return ParseObjCAtProtocolDeclaration(); + case tok::objc_implementation: + return ParseObjCAtImplementationDeclaration(); + case tok::objc_end: + return ParseObjCAtEndDeclaration(); + case tok::objc_compatibility_alias: + return ParseObjCAtAliasDeclaration(); + default: + Diag(AtLoc, diag::err_unexpected_at); + SkipUntil(tok::semi); + } +} + +/// +/// objc-class-declaration: +/// '@' 'class' identifier-list ';' +/// +void Parser::ParseObjCAtClassDeclaration(SourceLocation atLoc) { + ConsumeToken(); // the identifier "class" + llvm::SmallVector<IdentifierInfo *, 8> ClassNames; + + while (1) { + if (Tok.getKind() != tok::identifier) { + Diag(Tok, diag::err_expected_ident); + SkipUntil(tok::semi); + return; + } + + ClassNames.push_back(Tok.getIdentifierInfo()); + ConsumeToken(); + + if (Tok.getKind() != tok::comma) + break; + + ConsumeToken(); + } + + // Consume the ';'. + if (ExpectAndConsume(tok::semi, diag::err_expected_semi_after, "@class")) + return; + + Actions.ParsedObjcClassDeclaration(CurScope, + &ClassNames[0], ClassNames.size()); +} + +void Parser::ParseObjCAtInterfaceDeclaration() { + assert(0 && "Unimp"); +} +void Parser::ParseObjCAtProtocolDeclaration() { + assert(0 && "Unimp"); +} +void Parser::ParseObjCAtImplementationDeclaration() { + assert(0 && "Unimp"); +} +void Parser::ParseObjCAtEndDeclaration() { + assert(0 && "Unimp"); +} +void Parser::ParseObjCAtAliasDeclaration() { + assert(0 && "Unimp"); +} + +void Parser::ParseObjCInstanceMethodDeclaration() { + assert(0 && "Unimp"); +} + +void Parser::ParseObjCClassMethodDeclaration() { + assert(0 && "Unimp"); +} diff --git a/Parse/ParseStmt.cpp b/Parse/ParseStmt.cpp new file mode 100644 index 0000000000..7bc7a9c9a7 --- /dev/null +++ b/Parse/ParseStmt.cpp @@ -0,0 +1,848 @@ +//===--- ParseStmt.cpp - Statement and Block Parser -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Statement and Block portions of the Parser +// interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/Parser.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Parse/DeclSpec.h" +#include "clang/Parse/Scope.h" +using namespace clang; + +//===----------------------------------------------------------------------===// +// C99 6.8: Statements and Blocks. +//===----------------------------------------------------------------------===// + +/// ParseStatementOrDeclaration - Read 'statement' or 'declaration'. +/// StatementOrDeclaration: +/// statement +/// declaration +/// +/// statement: +/// labeled-statement +/// compound-statement +/// expression-statement +/// selection-statement +/// iteration-statement +/// jump-statement +/// [OBC] objc-throw-statement [TODO] +/// [OBC] objc-try-catch-statement [TODO] +/// [OBC] objc-synchronized-statement [TODO] +/// [GNU] asm-statement +/// [OMP] openmp-construct [TODO] +/// +/// labeled-statement: +/// identifier ':' statement +/// 'case' constant-expression ':' statement +/// 'default' ':' statement +/// +/// selection-statement: +/// if-statement +/// switch-statement +/// +/// iteration-statement: +/// while-statement +/// do-statement +/// for-statement +/// +/// expression-statement: +/// expression[opt] ';' +/// +/// jump-statement: +/// 'goto' identifier ';' +/// 'continue' ';' +/// 'break' ';' +/// 'return' expression[opt] ';' +/// [GNU] 'goto' '*' expression ';' +/// +/// [OBC] objc-throw-statement: [TODO] +/// [OBC] '@' 'throw' expression ';' [TODO] +/// [OBC] '@' 'throw' ';' [TODO] +/// +Parser::StmtResult Parser::ParseStatementOrDeclaration(bool OnlyStatement) { + const char *SemiError = 0; + Parser::StmtResult Res; + + // Cases in this switch statement should fall through if the parser expects + // the token to end in a semicolon (in which case SemiError should be set), + // or they directly 'return;' if not. + switch (Tok.getKind()) { + case tok::identifier: // C99 6.8.1: labeled-statement + // identifier ':' statement + // declaration (if !OnlyStatement) + // expression[opt] ';' + return ParseIdentifierStatement(OnlyStatement); + + default: + if (!OnlyStatement && isDeclarationSpecifier()) { + // TODO: warn/disable if declaration is in the middle of a block and !C99. + return Actions.ParseDeclStmt(ParseDeclaration(Declarator::BlockContext)); + } else if (Tok.getKind() == tok::r_brace) { + Diag(Tok, diag::err_expected_statement); + return true; + } else { + // expression[opt] ';' + ExprResult Res = ParseExpression(); + if (Res.isInvalid) { + // If the expression is invalid, skip ahead to the next semicolon. Not + // doing this opens us up to the possibility of infinite loops if + // ParseExpression does not consume any tokens. + SkipUntil(tok::semi); + return true; + } + // Otherwise, eat the semicolon. + ExpectAndConsume(tok::semi, diag::err_expected_semi_after_expr); + return Actions.ParseExprStmt(Res.Val); + } + + case tok::kw_case: // C99 6.8.1: labeled-statement + return ParseCaseStatement(); + case tok::kw_default: // C99 6.8.1: labeled-statement + return ParseDefaultStatement(); + + case tok::l_brace: // C99 6.8.2: compound-statement + return ParseCompoundStatement(); + case tok::semi: // C99 6.8.3p3: expression[opt] ';' + return Actions.ParseNullStmt(ConsumeToken()); + + case tok::kw_if: // C99 6.8.4.1: if-statement + return ParseIfStatement(); + case tok::kw_switch: // C99 6.8.4.2: switch-statement + return ParseSwitchStatement(); + + case tok::kw_while: // C99 6.8.5.1: while-statement + return ParseWhileStatement(); + case tok::kw_do: // C99 6.8.5.2: do-statement + Res = ParseDoStatement(); + SemiError = "do/while loop"; + break; + case tok::kw_for: // C99 6.8.5.3: for-statement + return ParseForStatement(); + + case tok::kw_goto: // C99 6.8.6.1: goto-statement + Res = ParseGotoStatement(); + SemiError = "goto statement"; + break; + case tok::kw_continue: // C99 6.8.6.2: continue-statement + Res = ParseContinueStatement(); + SemiError = "continue statement"; + break; + case tok::kw_break: // C99 6.8.6.3: break-statement + Res = ParseBreakStatement(); + SemiError = "break statement"; + break; + case tok::kw_return: // C99 6.8.6.4: return-statement + Res = ParseReturnStatement(); + SemiError = "return statement"; + break; + + case tok::kw_asm: + Res = ParseAsmStatement(); + SemiError = "asm statement"; + break; + } + + // If we reached this code, the statement must end in a semicolon. + if (Tok.getKind() == tok::semi) { + ConsumeToken(); + } else { + Diag(Tok, diag::err_expected_semi_after, SemiError); + SkipUntil(tok::semi); + } + return Res; +} + +/// ParseIdentifierStatement - Because we don't have two-token lookahead, we +/// have a bit of a quandry here. Reading the identifier is necessary to see if +/// there is a ':' after it. If there is, this is a label, regardless of what +/// else the identifier can mean. If not, this is either part of a declaration +/// (if the identifier is a type-name) or part of an expression. +/// +/// labeled-statement: +/// identifier ':' statement +/// [GNU] identifier ':' attributes[opt] statement +/// declaration (if !OnlyStatement) +/// expression[opt] ';' +/// +Parser::StmtResult Parser::ParseIdentifierStatement(bool OnlyStatement) { + assert(Tok.getKind() == tok::identifier && Tok.getIdentifierInfo() && + "Not an identifier!"); + + LexerToken IdentTok = Tok; // Save the whole token. + ConsumeToken(); // eat the identifier. + + // identifier ':' statement + if (Tok.getKind() == tok::colon) { + SourceLocation ColonLoc = ConsumeToken(); + + // Read label attributes, if present. + DeclTy *AttrList = 0; + if (Tok.getKind() == tok::kw___attribute) + // TODO: save these somewhere. + AttrList = ParseAttributes(); + + StmtResult SubStmt = ParseStatement(); + + // Broken substmt shouldn't prevent the label from being added to the AST. + if (SubStmt.isInvalid) + SubStmt = Actions.ParseNullStmt(ColonLoc); + + return Actions.ParseLabelStmt(IdentTok.getLocation(), + IdentTok.getIdentifierInfo(), + ColonLoc, SubStmt.Val); + } + + // Check to see if this is a declaration. + void *TypeRep; + if (!OnlyStatement && + (TypeRep = Actions.isTypeName(*IdentTok.getIdentifierInfo(), CurScope))) { + // Handle this. Warn/disable if in middle of block and !C99. + DeclSpec DS; + + // Add the typedef name to the start of the decl-specs. + const char *PrevSpec = 0; + int isInvalid = DS.SetTypeSpecType(DeclSpec::TST_typedef, + IdentTok.getLocation(), PrevSpec, + TypeRep); + assert(!isInvalid && "First declspec can't be invalid!"); + + // ParseDeclarationSpecifiers will continue from there. + ParseDeclarationSpecifiers(DS); + + // C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };" + // declaration-specifiers init-declarator-list[opt] ';' + if (Tok.getKind() == tok::semi) { + // TODO: emit error on 'int;' or 'const enum foo;'. + // if (!DS.isMissingDeclaratorOk()) Diag(...); + + ConsumeToken(); + // FIXME: Return this as a type decl. + return 0; + } + + // Parse all the declarators. + Declarator DeclaratorInfo(DS, Declarator::BlockContext); + ParseDeclarator(DeclaratorInfo); + + DeclTy *Decl = ParseInitDeclaratorListAfterFirstDeclarator(DeclaratorInfo); + return Decl ? Actions.ParseDeclStmt(Decl) : 0; + } + + // Otherwise, this is an expression. Seed it with II and parse it. + ExprResult Res = ParseExpressionWithLeadingIdentifier(IdentTok); + if (Res.isInvalid) { + SkipUntil(tok::semi); + return true; + } else if (Tok.getKind() != tok::semi) { + Diag(Tok, diag::err_expected_semi_after, "expression"); + SkipUntil(tok::semi); + return true; + } else { + ConsumeToken(); + // Convert expr to a stmt. + return Actions.ParseExprStmt(Res.Val); + } +} + +/// ParseCaseStatement +/// labeled-statement: +/// 'case' constant-expression ':' statement +/// [GNU] 'case' constant-expression '...' constant-expression ':' statement +/// +/// Note that this does not parse the 'statement' at the end. +/// +Parser::StmtResult Parser::ParseCaseStatement() { + assert(Tok.getKind() == tok::kw_case && "Not a case stmt!"); + SourceLocation CaseLoc = ConsumeToken(); // eat the 'case'. + + ExprResult LHS = ParseConstantExpression(); + if (LHS.isInvalid) { + SkipUntil(tok::colon); + return true; + } + + // GNU case range extension. + SourceLocation DotDotDotLoc; + ExprTy *RHSVal = 0; + if (Tok.getKind() == tok::ellipsis) { + Diag(Tok, diag::ext_gnu_case_range); + DotDotDotLoc = ConsumeToken(); + + ExprResult RHS = ParseConstantExpression(); + if (RHS.isInvalid) { + SkipUntil(tok::colon); + return true; + } + RHSVal = RHS.Val; + } + + if (Tok.getKind() != tok::colon) { + Diag(Tok, diag::err_expected_colon_after, "'case'"); + SkipUntil(tok::colon); + return true; + } + + SourceLocation ColonLoc = ConsumeToken(); + + // Diagnose the common error "switch (X) { case 4: }", which is not valid. + if (Tok.getKind() == tok::r_brace) { + Diag(Tok, diag::err_label_end_of_compound_statement); + return true; + } + + StmtResult SubStmt = ParseStatement(); + + // Broken substmt shouldn't prevent the case from being added to the AST. + if (SubStmt.isInvalid) + SubStmt = Actions.ParseNullStmt(ColonLoc); + + // TODO: look up enclosing switch stmt. + return Actions.ParseCaseStmt(CaseLoc, LHS.Val, DotDotDotLoc, RHSVal, ColonLoc, + SubStmt.Val); +} + +/// ParseDefaultStatement +/// labeled-statement: +/// 'default' ':' statement +/// Note that this does not parse the 'statement' at the end. +/// +Parser::StmtResult Parser::ParseDefaultStatement() { + assert(Tok.getKind() == tok::kw_default && "Not a default stmt!"); + SourceLocation DefaultLoc = ConsumeToken(); // eat the 'default'. + + if (Tok.getKind() != tok::colon) { + Diag(Tok, diag::err_expected_colon_after, "'default'"); + SkipUntil(tok::colon); + return true; + } + + SourceLocation ColonLoc = ConsumeToken(); + + // Diagnose the common error "switch (X) {... default: }", which is not valid. + if (Tok.getKind() == tok::r_brace) { + Diag(Tok, diag::err_label_end_of_compound_statement); + return true; + } + + StmtResult SubStmt = ParseStatement(); + if (SubStmt.isInvalid) + return true; + + // TODO: look up enclosing switch stmt. + return Actions.ParseDefaultStmt(DefaultLoc, ColonLoc, SubStmt.Val); +} + + +/// ParseCompoundStatement - Parse a "{}" block. +/// +/// compound-statement: [C99 6.8.2] +/// { block-item-list[opt] } +/// [GNU] { label-declarations block-item-list } [TODO] +/// +/// block-item-list: +/// block-item +/// block-item-list block-item +/// +/// block-item: +/// declaration +/// [GNU] '__extension__' declaration [TODO] +/// statement +/// [OMP] openmp-directive [TODO] +/// +/// [GNU] label-declarations: +/// [GNU] label-declaration +/// [GNU] label-declarations label-declaration +/// +/// [GNU] label-declaration: +/// [GNU] '__label__' identifier-list ';' +/// +/// [OMP] openmp-directive: [TODO] +/// [OMP] barrier-directive +/// [OMP] flush-directive +/// +Parser::StmtResult Parser::ParseCompoundStatement() { + assert(Tok.getKind() == tok::l_brace && "Not a compount stmt!"); + + // Enter a scope to hold everything within the compound stmt. + EnterScope(0); + + // Parse the statements in the body. + StmtResult Body = ParseCompoundStatementBody(); + + ExitScope(); + return Body; +} + + +/// ParseCompoundStatementBody - Parse a sequence of statements and invoke the +/// ParseCompoundStmt action. This expects the '{' to be the current token, and +/// consume the '}' at the end of the block. It does not manipulate the scope +/// stack. +Parser::StmtResult Parser::ParseCompoundStatementBody() { + SourceLocation LBraceLoc = ConsumeBrace(); // eat the '{'. + + // TODO: "__label__ X, Y, Z;" is the GNU "Local Label" extension. These are + // only allowed at the start of a compound stmt. + + llvm::SmallVector<StmtTy*, 32> Stmts; + while (Tok.getKind() != tok::r_brace && Tok.getKind() != tok::eof) { + StmtResult R = ParseStatementOrDeclaration(false); + if (!R.isInvalid && R.Val) + Stmts.push_back(R.Val); + } + + // We broke out of the while loop because we found a '}' or EOF. + if (Tok.getKind() != tok::r_brace) { + Diag(Tok, diag::err_expected_rbrace); + return 0; + } + + SourceLocation RBraceLoc = ConsumeBrace(); + return Actions.ParseCompoundStmt(LBraceLoc, RBraceLoc, + &Stmts[0], Stmts.size()); +} + +/// ParseIfStatement +/// if-statement: [C99 6.8.4.1] +/// 'if' '(' expression ')' statement +/// 'if' '(' expression ')' statement 'else' statement +/// +Parser::StmtResult Parser::ParseIfStatement() { + assert(Tok.getKind() == tok::kw_if && "Not an if stmt!"); + SourceLocation IfLoc = ConsumeToken(); // eat the 'if'. + + if (Tok.getKind() != tok::l_paren) { + Diag(Tok, diag::err_expected_lparen_after, "if"); + SkipUntil(tok::semi); + return true; + } + + // Parse the condition. + ExprResult CondExp = ParseSimpleParenExpression(); + if (CondExp.isInvalid) { + SkipUntil(tok::semi); + return true; + } + + // Read the if condition. + StmtResult CondStmt = ParseStatement(); + + // Broken substmt shouldn't prevent the label from being added to the AST. + if (CondStmt.isInvalid) + CondStmt = Actions.ParseNullStmt(Tok.getLocation()); + + + // If it has an else, parse it. + SourceLocation ElseLoc; + StmtResult ElseStmt(false); + if (Tok.getKind() == tok::kw_else) { + ElseLoc = ConsumeToken(); + ElseStmt = ParseStatement(); + + if (ElseStmt.isInvalid) + ElseStmt = Actions.ParseNullStmt(ElseLoc); + } + + return Actions.ParseIfStmt(IfLoc, CondExp.Val, CondStmt.Val, + ElseLoc, ElseStmt.Val); +} + +/// ParseSwitchStatement +/// switch-statement: +/// 'switch' '(' expression ')' statement +Parser::StmtResult Parser::ParseSwitchStatement() { + assert(Tok.getKind() == tok::kw_switch && "Not a switch stmt!"); + SourceLocation SwitchLoc = ConsumeToken(); // eat the 'switch'. + + if (Tok.getKind() != tok::l_paren) { + Diag(Tok, diag::err_expected_lparen_after, "switch"); + SkipUntil(tok::semi); + return true; + } + + // Start the switch scope. + EnterScope(Scope::BreakScope); + + // Parse the condition. + ExprResult Cond = ParseSimpleParenExpression(); + + // Read the body statement. + StmtResult Body = ParseStatement(); + + ExitScope(); + + if (Cond.isInvalid || Body.isInvalid) return true; + + return Actions.ParseSwitchStmt(SwitchLoc, Cond.Val, Body.Val); +} + +/// ParseWhileStatement +/// while-statement: [C99 6.8.5.1] +/// 'while' '(' expression ')' statement +Parser::StmtResult Parser::ParseWhileStatement() { + assert(Tok.getKind() == tok::kw_while && "Not a while stmt!"); + SourceLocation WhileLoc = Tok.getLocation(); + ConsumeToken(); // eat the 'while'. + + if (Tok.getKind() != tok::l_paren) { + Diag(Tok, diag::err_expected_lparen_after, "while"); + SkipUntil(tok::semi); + return true; + } + + // Start the loop scope. + EnterScope(Scope::BreakScope | Scope::ContinueScope); + + // Parse the condition. + ExprResult Cond = ParseSimpleParenExpression(); + + // Read the body statement. + StmtResult Body = ParseStatement(); + + ExitScope(); + + if (Cond.isInvalid || Body.isInvalid) return true; + + return Actions.ParseWhileStmt(WhileLoc, Cond.Val, Body.Val); +} + +/// ParseDoStatement +/// do-statement: [C99 6.8.5.2] +/// 'do' statement 'while' '(' expression ')' ';' +/// Note: this lets the caller parse the end ';'. +Parser::StmtResult Parser::ParseDoStatement() { + assert(Tok.getKind() == tok::kw_do && "Not a do stmt!"); + SourceLocation DoLoc = ConsumeToken(); // eat the 'do'. + + // Start the loop scope. + EnterScope(Scope::BreakScope | Scope::ContinueScope); + + // Read the body statement. + StmtResult Body = ParseStatement(); + + if (Tok.getKind() != tok::kw_while) { + ExitScope(); + Diag(Tok, diag::err_expected_while); + Diag(DoLoc, diag::err_matching, "do"); + SkipUntil(tok::semi); + return true; + } + SourceLocation WhileLoc = ConsumeToken(); + + if (Tok.getKind() != tok::l_paren) { + ExitScope(); + Diag(Tok, diag::err_expected_lparen_after, "do/while"); + SkipUntil(tok::semi); + return true; + } + + // Parse the condition. + ExprResult Cond = ParseSimpleParenExpression(); + + ExitScope(); + + if (Cond.isInvalid || Body.isInvalid) return true; + + return Actions.ParseDoStmt(DoLoc, Body.Val, WhileLoc, Cond.Val); +} + +/// ParseForStatement +/// for-statement: [C99 6.8.5.3] +/// 'for' '(' expr[opt] ';' expr[opt] ';' expr[opt] ')' statement +/// 'for' '(' declaration expr[opt] ';' expr[opt] ')' statement +Parser::StmtResult Parser::ParseForStatement() { + assert(Tok.getKind() == tok::kw_for && "Not a for stmt!"); + SourceLocation ForLoc = ConsumeToken(); // eat the 'for'. + + if (Tok.getKind() != tok::l_paren) { + Diag(Tok, diag::err_expected_lparen_after, "for"); + SkipUntil(tok::semi); + return true; + } + + EnterScope(Scope::BreakScope | Scope::ContinueScope); + + SourceLocation LParenLoc = ConsumeParen(); + ExprResult Value; + + StmtTy *FirstPart = 0; + ExprTy *SecondPart = 0; + StmtTy *ThirdPart = 0; + + // Parse the first part of the for specifier. + if (Tok.getKind() == tok::semi) { // for (; + // no first part, eat the ';'. + ConsumeToken(); + } else if (isDeclarationSpecifier()) { // for (int X = 4; + // Parse declaration, which eats the ';'. + if (!getLang().C99) // Use of C99-style for loops in C90 mode? + Diag(Tok, diag::ext_c99_variable_decl_in_for_loop); + DeclTy *aBlockVarDecl = ParseDeclaration(Declarator::ForContext); + StmtResult stmtResult = Actions.ParseDeclStmt(aBlockVarDecl); + FirstPart = stmtResult.isInvalid ? 0 : stmtResult.Val; + } else { + Value = ParseExpression(); + + // Turn the expression into a stmt. + if (!Value.isInvalid) { + StmtResult R = Actions.ParseExprStmt(Value.Val); + if (!R.isInvalid) + FirstPart = R.Val; + } + + if (Tok.getKind() == tok::semi) { + ConsumeToken(); + } else { + if (!Value.isInvalid) Diag(Tok, diag::err_expected_semi_for); + SkipUntil(tok::semi); + } + } + + // Parse the second part of the for specifier. + if (Tok.getKind() == tok::semi) { // for (...;; + // no second part. + Value = ExprResult(); + } else { + Value = ParseExpression(); + if (!Value.isInvalid) + SecondPart = Value.Val; + } + + if (Tok.getKind() == tok::semi) { + ConsumeToken(); + } else { + if (!Value.isInvalid) Diag(Tok, diag::err_expected_semi_for); + SkipUntil(tok::semi); + } + + // Parse the third part of the for specifier. + if (Tok.getKind() == tok::r_paren) { // for (...;...;) + // no third part. + Value = ExprResult(); + } else { + Value = ParseExpression(); + if (!Value.isInvalid) { + // Turn the expression into a stmt. + StmtResult R = Actions.ParseExprStmt(Value.Val); + if (!R.isInvalid) + ThirdPart = R.Val; + } + } + + // Match the ')'. + SourceLocation RParenLoc = MatchRHSPunctuation(tok::r_paren, LParenLoc); + + // Read the body statement. + StmtResult Body = ParseStatement(); + + // Leave the for-scope. + ExitScope(); + + if (Body.isInvalid) + return Body; + + return Actions.ParseForStmt(ForLoc, LParenLoc, FirstPart, SecondPart, + ThirdPart, RParenLoc, Body.Val); +} + +/// ParseGotoStatement +/// jump-statement: +/// 'goto' identifier ';' +/// [GNU] 'goto' '*' expression ';' +/// +/// Note: this lets the caller parse the end ';'. +/// +Parser::StmtResult Parser::ParseGotoStatement() { + assert(Tok.getKind() == tok::kw_goto && "Not a goto stmt!"); + SourceLocation GotoLoc = ConsumeToken(); // eat the 'goto'. + + StmtResult Res; + if (Tok.getKind() == tok::identifier) { + Res = Actions.ParseGotoStmt(GotoLoc, Tok.getLocation(), + Tok.getIdentifierInfo()); + ConsumeToken(); + } else if (Tok.getKind() == tok::star && !getLang().NoExtensions) { + // GNU indirect goto extension. + Diag(Tok, diag::ext_gnu_indirect_goto); + SourceLocation StarLoc = ConsumeToken(); + ExprResult R = ParseExpression(); + if (R.isInvalid) { // Skip to the semicolon, but don't consume it. + SkipUntil(tok::semi, false, true); + return true; + } + Res = Actions.ParseIndirectGotoStmt(GotoLoc, StarLoc, R.Val); + } + return Res; +} + +/// ParseContinueStatement +/// jump-statement: +/// 'continue' ';' +/// +/// Note: this lets the caller parse the end ';'. +/// +Parser::StmtResult Parser::ParseContinueStatement() { + SourceLocation ContinueLoc = ConsumeToken(); // eat the 'continue'. + return Actions.ParseContinueStmt(ContinueLoc, CurScope); +} + +/// ParseBreakStatement +/// jump-statement: +/// 'break' ';' +/// +/// Note: this lets the caller parse the end ';'. +/// +Parser::StmtResult Parser::ParseBreakStatement() { + SourceLocation BreakLoc = ConsumeToken(); // eat the 'break'. + return Actions.ParseBreakStmt(BreakLoc, CurScope); +} + +/// ParseReturnStatement +/// jump-statement: +/// 'return' expression[opt] ';' +Parser::StmtResult Parser::ParseReturnStatement() { + assert(Tok.getKind() == tok::kw_return && "Not a return stmt!"); + SourceLocation ReturnLoc = ConsumeToken(); // eat the 'return'. + + ExprResult R(0); + if (Tok.getKind() != tok::semi) { + R = ParseExpression(); + if (R.isInvalid) { // Skip to the semicolon, but don't consume it. + SkipUntil(tok::semi, false, true); + return true; + } + } + return Actions.ParseReturnStmt(ReturnLoc, R.Val); +} + +/// ParseAsmStatement - Parse a GNU extended asm statement. +/// [GNU] asm-statement: +/// 'asm' type-qualifier[opt] '(' asm-argument ')' ';' +/// +/// [GNU] asm-argument: +/// asm-string-literal +/// asm-string-literal ':' asm-operands[opt] +/// asm-string-literal ':' asm-operands[opt] ':' asm-operands[opt] +/// asm-string-literal ':' asm-operands[opt] ':' asm-operands[opt] +/// ':' asm-clobbers +/// +/// [GNU] asm-clobbers: +/// asm-string-literal +/// asm-clobbers ',' asm-string-literal +/// +Parser::StmtResult Parser::ParseAsmStatement() { + assert(Tok.getKind() == tok::kw_asm && "Not an asm stmt"); + ConsumeToken(); + + DeclSpec DS; + SourceLocation Loc = Tok.getLocation(); + ParseTypeQualifierListOpt(DS); + + // GNU asms accept, but warn, about type-qualifiers other than volatile. + if (DS.getTypeQualifiers() & DeclSpec::TQ_const) + Diag(Loc, diag::w_asm_qualifier_ignored, "const"); + if (DS.getTypeQualifiers() & DeclSpec::TQ_restrict) + Diag(Loc, diag::w_asm_qualifier_ignored, "restrict"); + + // Remember if this was a volatile asm. + //bool isVolatile = DS.TypeQualifiers & DeclSpec::TQ_volatile; + + if (Tok.getKind() != tok::l_paren) { + Diag(Tok, diag::err_expected_lparen_after, "asm"); + SkipUntil(tok::r_paren); + return true; + } + Loc = ConsumeParen(); + + ParseAsmStringLiteral(); + + // Parse Outputs, if present. + ParseAsmOperandsOpt(); + + // Parse Inputs, if present. + ParseAsmOperandsOpt(); + + // Parse the clobbers, if present. + if (Tok.getKind() == tok::colon) { + ConsumeToken(); + + if (isTokenStringLiteral()) { + // Parse the asm-string list for clobbers. + while (1) { + ParseAsmStringLiteral(); + + if (Tok.getKind() != tok::comma) break; + ConsumeToken(); + } + } + } + + MatchRHSPunctuation(tok::r_paren, Loc); + + // FIXME: Implement action for asm parsing. + return false; +} + +/// ParseAsmOperands - Parse the asm-operands production as used by +/// asm-statement. We also parse a leading ':' token. If the leading colon is +/// not present, we do not parse anything. +/// +/// [GNU] asm-operands: +/// asm-operand +/// asm-operands ',' asm-operand +/// +/// [GNU] asm-operand: +/// asm-string-literal '(' expression ')' +/// '[' identifier ']' asm-string-literal '(' expression ')' +/// +void Parser::ParseAsmOperandsOpt() { + // Only do anything if this operand is present. + if (Tok.getKind() != tok::colon) return; + ConsumeToken(); + + // 'asm-operands' isn't present? + if (!isTokenStringLiteral() && Tok.getKind() != tok::l_square) + return; + + while (1) { + // Read the [id] if present. + if (Tok.getKind() == tok::l_square) { + SourceLocation Loc = ConsumeBracket(); + + if (Tok.getKind() != tok::identifier) { + Diag(Tok, diag::err_expected_ident); + SkipUntil(tok::r_paren); + return; + } + MatchRHSPunctuation(tok::r_square, Loc); + } + + ParseAsmStringLiteral(); + + if (Tok.getKind() != tok::l_paren) { + Diag(Tok, diag::err_expected_lparen_after, "asm operand"); + SkipUntil(tok::r_paren); + return; + } + + // Read the parenthesized expression. + ExprResult Res = ParseSimpleParenExpression(); + if (Res.isInvalid) { + SkipUntil(tok::r_paren); + return; + } + + // Eat the comma and continue parsing if it exists. + if (Tok.getKind() != tok::comma) return; + ConsumeToken(); + } +} diff --git a/Parse/Parser.cpp b/Parse/Parser.cpp new file mode 100644 index 0000000000..251febbcd6 --- /dev/null +++ b/Parse/Parser.cpp @@ -0,0 +1,615 @@ +//===--- Parser.cpp - C Language Family Parser ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Parser interfaces. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/Parser.h" +#include "clang/Parse/DeclSpec.h" +#include "clang/Parse/Scope.h" +using namespace clang; + +Parser::Parser(Preprocessor &pp, Action &actions) + : PP(pp), Actions(actions), Diags(PP.getDiagnostics()) { + Tok.setKind(tok::eof); + CurScope = 0; + + ParenCount = BracketCount = BraceCount = 0; +} + +/// Out-of-line virtual destructor to provide home for Action class. +Action::~Action() {} + + +void Parser::Diag(SourceLocation Loc, unsigned DiagID, + const std::string &Msg) { + Diags.Report(Loc, DiagID, &Msg, 1); +} + +/// MatchRHSPunctuation - For punctuation with a LHS and RHS (e.g. '['/']'), +/// this helper function matches and consumes the specified RHS token if +/// present. If not present, it emits the specified diagnostic indicating +/// that the parser failed to match the RHS of the token at LHSLoc. LHSName +/// should be the name of the unmatched LHS token. +SourceLocation Parser::MatchRHSPunctuation(tok::TokenKind RHSTok, + SourceLocation LHSLoc) { + + if (Tok.getKind() == RHSTok) + return ConsumeAnyToken(); + + SourceLocation R = Tok.getLocation(); + const char *LHSName = "unknown"; + diag::kind DID = diag::err_parse_error; + switch (RHSTok) { + default: break; + case tok::r_paren : LHSName = "("; DID = diag::err_expected_rparen; break; + case tok::r_brace : LHSName = "{"; DID = diag::err_expected_rbrace; break; + case tok::r_square: LHSName = "["; DID = diag::err_expected_rsquare; break; + case tok::greater: LHSName = "<"; DID = diag::err_expected_greater; break; + } + Diag(Tok, DID); + Diag(LHSLoc, diag::err_matching, LHSName); + SkipUntil(RHSTok); + return R; +} + +/// ExpectAndConsume - The parser expects that 'ExpectedTok' is next in the +/// input. If so, it is consumed and false is returned. +/// +/// If the input is malformed, this emits the specified diagnostic. Next, if +/// SkipToTok is specified, it calls SkipUntil(SkipToTok). Finally, true is +/// returned. +bool Parser::ExpectAndConsume(tok::TokenKind ExpectedTok, unsigned DiagID, + const char *Msg, tok::TokenKind SkipToTok) { + if (Tok.getKind() == ExpectedTok) { + ConsumeAnyToken(); + return false; + } + + Diag(Tok, DiagID, Msg); + if (SkipToTok != tok::unknown) + SkipUntil(SkipToTok); + return true; +} + +//===----------------------------------------------------------------------===// +// Error recovery. +//===----------------------------------------------------------------------===// + +/// SkipUntil - Read tokens until we get to the specified token, then consume +/// it (unless DontConsume is false). Because we cannot guarantee that the +/// token will ever occur, this skips to the next token, or to some likely +/// good stopping point. If StopAtSemi is true, skipping will stop at a ';' +/// character. +/// +/// If SkipUntil finds the specified token, it returns true, otherwise it +/// returns false. +bool Parser::SkipUntil(const tok::TokenKind *Toks, unsigned NumToks, + bool StopAtSemi, bool DontConsume) { + // We always want this function to skip at least one token if the first token + // isn't T and if not at EOF. + bool isFirstTokenSkipped = true; + while (1) { + // If we found one of the tokens, stop and return true. + for (unsigned i = 0; i != NumToks; ++i) { + if (Tok.getKind() == Toks[i]) { + if (DontConsume) { + // Noop, don't consume the token. + } else { + ConsumeAnyToken(); + } + return true; + } + } + + switch (Tok.getKind()) { + case tok::eof: + // Ran out of tokens. + return false; + + case tok::l_paren: + // Recursively skip properly-nested parens. + ConsumeParen(); + SkipUntil(tok::r_paren, false); + break; + case tok::l_square: + // Recursively skip properly-nested square brackets. + ConsumeBracket(); + SkipUntil(tok::r_square, false); + break; + case tok::l_brace: + // Recursively skip properly-nested braces. + ConsumeBrace(); + SkipUntil(tok::r_brace, false); + break; + + // Okay, we found a ']' or '}' or ')', which we think should be balanced. + // Since the user wasn't looking for this token (if they were, it would + // already be handled), this isn't balanced. If there is a LHS token at a + // higher level, we will assume that this matches the unbalanced token + // and return it. Otherwise, this is a spurious RHS token, which we skip. + case tok::r_paren: + if (ParenCount && !isFirstTokenSkipped) + return false; // Matches something. + ConsumeParen(); + break; + case tok::r_square: + if (BracketCount && !isFirstTokenSkipped) + return false; // Matches something. + ConsumeBracket(); + break; + case tok::r_brace: + if (BraceCount && !isFirstTokenSkipped) + return false; // Matches something. + ConsumeBrace(); + break; + + case tok::string_literal: + case tok::wide_string_literal: + ConsumeStringToken(); + break; + case tok::semi: + if (StopAtSemi) + return false; + // FALL THROUGH. + default: + // Skip this token. + ConsumeToken(); + break; + } + isFirstTokenSkipped = false; + } +} + +//===----------------------------------------------------------------------===// +// Scope manipulation +//===----------------------------------------------------------------------===// + +/// ScopeCache - Cache scopes to avoid malloc traffic. +/// FIXME: eliminate this static ctor +static llvm::SmallVector<Scope*, 16> ScopeCache; + +/// EnterScope - Start a new scope. +void Parser::EnterScope(unsigned ScopeFlags) { + if (!ScopeCache.empty()) { + Scope *N = ScopeCache.back(); + ScopeCache.pop_back(); + N->Init(CurScope, ScopeFlags); + CurScope = N; + } else { + CurScope = new Scope(CurScope, ScopeFlags); + } +} + +/// ExitScope - Pop a scope off the scope stack. +void Parser::ExitScope() { + assert(CurScope && "Scope imbalance!"); + + // Inform the actions module that this scope is going away. + Actions.PopScope(Tok.getLocation(), CurScope); + + Scope *Old = CurScope; + CurScope = Old->getParent(); + + if (ScopeCache.size() == 16) + delete Old; + else + ScopeCache.push_back(Old); +} + + + + +//===----------------------------------------------------------------------===// +// C99 6.9: External Definitions. +//===----------------------------------------------------------------------===// + +Parser::~Parser() { + // If we still have scopes active, delete the scope tree. + delete CurScope; + + // Free the scope cache. + while (!ScopeCache.empty()) { + delete ScopeCache.back(); + ScopeCache.pop_back(); + } +} + +/// Initialize - Warm up the parser. +/// +void Parser::Initialize() { + // Prime the lexer look-ahead. + ConsumeToken(); + + // Create the global scope, install it as the current scope. + assert(CurScope == 0 && "A scope is already active?"); + EnterScope(0); + + + // Install builtin types. + // TODO: Move this someplace more useful. + { + const char *Dummy; + + //__builtin_va_list + DeclSpec DS; + bool Error = DS.SetStorageClassSpec(DeclSpec::SCS_typedef, SourceLocation(), + Dummy); + + // TODO: add a 'TST_builtin' type? + Error |= DS.SetTypeSpecType(DeclSpec::TST_int, SourceLocation(), Dummy); + assert(!Error && "Error setting up __builtin_va_list!"); + + Declarator D(DS, Declarator::FileContext); + D.SetIdentifier(PP.getIdentifierInfo("__builtin_va_list"),SourceLocation()); + Actions.ParseDeclarator(CurScope, D, 0, 0); + } + + if (Tok.getKind() == tok::eof) // Empty source file is an extension. + Diag(Tok, diag::ext_empty_source_file); +} + +/// ParseTopLevelDecl - Parse one top-level declaration, return whatever the +/// action tells us to. This returns true if the EOF was encountered. +bool Parser::ParseTopLevelDecl(DeclTy*& Result) { + Result = 0; + if (Tok.getKind() == tok::eof) return true; + + Result = ParseExternalDeclaration(); + return false; +} + +/// Finalize - Shut down the parser. +/// +void Parser::Finalize() { + ExitScope(); + assert(CurScope == 0 && "Scope imbalance!"); +} + +/// ParseTranslationUnit: +/// translation-unit: [C99 6.9] +/// external-declaration +/// translation-unit external-declaration +void Parser::ParseTranslationUnit() { + Initialize(); + + DeclTy *Res; + while (!ParseTopLevelDecl(Res)) + /*parse them all*/; + + Finalize(); +} + +/// ParseExternalDeclaration: +/// external-declaration: [C99 6.9] +/// function-definition [TODO] +/// declaration [TODO] +/// [EXT] ';' +/// [GNU] asm-definition +/// [GNU] __extension__ external-declaration [TODO] +/// [OBJC] objc-class-definition +/// [OBJC] objc-class-declaration +/// [OBJC] objc-alias-declaration +/// [OBJC] objc-protocol-definition +/// [OBJC] objc-method-definition +/// [OBJC] @end +/// +/// [GNU] asm-definition: +/// simple-asm-expr ';' +/// +Parser::DeclTy *Parser::ParseExternalDeclaration() { + switch (Tok.getKind()) { + case tok::semi: + Diag(Tok, diag::ext_top_level_semi); + ConsumeToken(); + // TODO: Invoke action for top-level semicolon. + return 0; + case tok::kw_asm: + ParseSimpleAsm(); + ExpectAndConsume(tok::semi, diag::err_expected_semi_after, + "top-level asm block"); + // TODO: Invoke action for top-level asm. + return 0; + case tok::at: + // @ is not a legal token unless objc is enabled, no need to check. + ParseObjCAtDirectives(); + return 0; + case tok::minus: + if (getLang().ObjC1) { + ParseObjCInstanceMethodDeclaration(); + } else { + Diag(Tok, diag::err_expected_external_declaration); + ConsumeToken(); + } + return 0; + case tok::plus: + if (getLang().ObjC1) { + ParseObjCClassMethodDeclaration(); + } else { + Diag(Tok, diag::err_expected_external_declaration); + ConsumeToken(); + } + return 0; + case tok::kw_typedef: + // A function definition cannot start with a 'typedef' keyword. + return ParseDeclaration(Declarator::FileContext); + default: + // We can't tell whether this is a function-definition or declaration yet. + return ParseDeclarationOrFunctionDefinition(); + } +} + +/// ParseDeclarationOrFunctionDefinition - Parse either a function-definition or +/// a declaration. We can't tell which we have until we read up to the +/// compound-statement in function-definition. +/// +/// function-definition: [C99 6.9.1] +/// declaration-specifiers[opt] declarator declaration-list[opt] +/// compound-statement [TODO] +/// declaration: [C99 6.7] +/// declaration-specifiers init-declarator-list[opt] ';' [TODO] +/// [!C99] init-declarator-list ';' [TODO] +/// [OMP] threadprivate-directive [TODO] +/// +Parser::DeclTy *Parser::ParseDeclarationOrFunctionDefinition() { + // Parse the common declaration-specifiers piece. + DeclSpec DS; + ParseDeclarationSpecifiers(DS); + + // C99 6.7.2.3p6: Handle "struct-or-union identifier;", "enum { X };" + // declaration-specifiers init-declarator-list[opt] ';' + if (Tok.getKind() == tok::semi) { + ConsumeToken(); + return Actions.ParsedFreeStandingDeclSpec(CurScope, DS); + } + + // Parse the first declarator. + Declarator DeclaratorInfo(DS, Declarator::FileContext); + ParseDeclarator(DeclaratorInfo); + // Error parsing the declarator? + if (DeclaratorInfo.getIdentifier() == 0) { + // If so, skip until the semi-colon or a }. + SkipUntil(tok::r_brace, true); + if (Tok.getKind() == tok::semi) + ConsumeToken(); + return 0; + } + + // If the declarator is the start of a function definition, handle it. + if (Tok.getKind() == tok::equal || // int X()= -> not a function def + Tok.getKind() == tok::comma || // int X(), -> not a function def + Tok.getKind() == tok::semi || // int X(); -> not a function def + Tok.getKind() == tok::kw_asm || // int X() __asm__ -> not a fn def + Tok.getKind() == tok::kw___attribute) {// int X() __attr__ -> not a fn def + // FALL THROUGH. + } else if (DeclaratorInfo.isFunctionDeclarator() && + (Tok.getKind() == tok::l_brace || // int X() {} + isDeclarationSpecifier())) { // int X(f) int f; {} + return ParseFunctionDefinition(DeclaratorInfo); + } else { + if (DeclaratorInfo.isFunctionDeclarator()) + Diag(Tok, diag::err_expected_fn_body); + else + Diag(Tok, diag::err_expected_after_declarator); + SkipUntil(tok::semi); + return 0; + } + + // Parse the init-declarator-list for a normal declaration. + return ParseInitDeclaratorListAfterFirstDeclarator(DeclaratorInfo); +} + +/// ParseFunctionDefinition - We parsed and verified that the specified +/// Declarator is well formed. If this is a K&R-style function, read the +/// parameters declaration-list, then start the compound-statement. +/// +/// declaration-specifiers[opt] declarator declaration-list[opt] +/// compound-statement [TODO] +/// +Parser::DeclTy *Parser::ParseFunctionDefinition(Declarator &D) { + const DeclaratorChunk &FnTypeInfo = D.getTypeObject(0); + assert(FnTypeInfo.Kind == DeclaratorChunk::Function && + "This isn't a function declarator!"); + const DeclaratorChunk::FunctionTypeInfo &FTI = FnTypeInfo.Fun; + + // If this declaration was formed with a K&R-style identifier list for the + // arguments, parse declarations for all of the args next. + // int foo(a,b) int a; float b; {} + if (!FTI.hasPrototype && FTI.NumArgs != 0) + ParseKNRParamDeclarations(D); + + // Enter a scope for the function body. + EnterScope(Scope::FnScope); + + // Tell the actions module that we have entered a function definition with the + // specified Declarator for the function. + DeclTy *Res = Actions.ParseStartOfFunctionDef(CurScope, D); + + + // We should have an opening brace now. + if (Tok.getKind() != tok::l_brace) { + Diag(Tok, diag::err_expected_fn_body); + + // Skip over garbage, until we get to '{'. Don't eat the '{'. + SkipUntil(tok::l_brace, true, true); + + // If we didn't find the '{', bail out. + if (Tok.getKind() != tok::l_brace) { + ExitScope(); + return 0; + } + } + + // Do not enter a scope for the brace, as the arguments are in the same scope + // (the function body) as the body itself. Instead, just read the statement + // list and put it into a CompoundStmt for safe keeping. + StmtResult FnBody = ParseCompoundStatementBody(); + if (FnBody.isInvalid) { + ExitScope(); + return 0; + } + + // Leave the function body scope. + ExitScope(); + + // TODO: Pass argument information. + return Actions.ParseFunctionDefBody(Res, FnBody.Val); +} + +/// ParseKNRParamDeclarations - Parse 'declaration-list[opt]' which provides +/// types for a function with a K&R-style identifier list for arguments. +void Parser::ParseKNRParamDeclarations(Declarator &D) { + // We know that the top-level of this declarator is a function. + DeclaratorChunk::FunctionTypeInfo &FTI = D.getTypeObject(0).Fun; + + // Read all the argument declarations. + while (isDeclarationSpecifier()) { + SourceLocation DSStart = Tok.getLocation(); + + // Parse the common declaration-specifiers piece. + DeclSpec DS; + ParseDeclarationSpecifiers(DS); + + // C99 6.9.1p6: 'each declaration in the declaration list shall have at + // least one declarator'. + // NOTE: GCC just makes this an ext-warn. It's not clear what it does with + // the declarations though. It's trivial to ignore them, really hard to do + // anything else with them. + if (Tok.getKind() == tok::semi) { + Diag(DSStart, diag::err_declaration_does_not_declare_param); + ConsumeToken(); + continue; + } + + // C99 6.9.1p6: Declarations shall contain no storage-class specifiers other + // than register. + if (DS.getStorageClassSpec() != DeclSpec::SCS_unspecified && + DS.getStorageClassSpec() != DeclSpec::SCS_register) { + Diag(DS.getStorageClassSpecLoc(), + diag::err_invalid_storage_class_in_func_decl); + DS.ClearStorageClassSpecs(); + } + if (DS.isThreadSpecified()) { + Diag(DS.getThreadSpecLoc(), + diag::err_invalid_storage_class_in_func_decl); + DS.ClearStorageClassSpecs(); + } + + // Parse the first declarator attached to this declspec. + Declarator ParmDeclarator(DS, Declarator::KNRTypeListContext); + ParseDeclarator(ParmDeclarator); + + // Handle the full declarator list. + while (1) { + DeclTy *AttrList; + // If attributes are present, parse them. + if (Tok.getKind() == tok::kw___attribute) + // FIXME: attach attributes too. + AttrList = ParseAttributes(); + + // Ask the actions module to compute the type for this declarator. + Action::TypeResult TR = + Actions.ParseParamDeclaratorType(CurScope, ParmDeclarator); + if (!TR.isInvalid && + // A missing identifier has already been diagnosed. + ParmDeclarator.getIdentifier()) { + + // Scan the argument list looking for the correct param to apply this + // type. + for (unsigned i = 0; ; ++i) { + // C99 6.9.1p6: those declarators shall declare only identifiers from + // the identifier list. + if (i == FTI.NumArgs) { + Diag(ParmDeclarator.getIdentifierLoc(), diag::err_no_matching_param, + ParmDeclarator.getIdentifier()->getName()); + break; + } + + if (FTI.ArgInfo[i].Ident == ParmDeclarator.getIdentifier()) { + // Reject redefinitions of parameters. + if (FTI.ArgInfo[i].TypeInfo) { + Diag(ParmDeclarator.getIdentifierLoc(), + diag::err_param_redefinition, + ParmDeclarator.getIdentifier()->getName()); + } else { + FTI.ArgInfo[i].TypeInfo = TR.Val; + } + break; + } + } + } + + // If we don't have a comma, it is either the end of the list (a ';') or + // an error, bail out. + if (Tok.getKind() != tok::comma) + break; + + // Consume the comma. + ConsumeToken(); + + // Parse the next declarator. + ParmDeclarator.clear(); + ParseDeclarator(ParmDeclarator); + } + + if (Tok.getKind() == tok::semi) { + ConsumeToken(); + } else { + Diag(Tok, diag::err_parse_error); + // Skip to end of block or statement + SkipUntil(tok::semi, true); + if (Tok.getKind() == tok::semi) + ConsumeToken(); + } + } + + // The actions module must verify that all arguments were declared. +} + + +/// ParseAsmStringLiteral - This is just a normal string-literal, but is not +/// allowed to be a wide string, and is not subject to character translation. +/// +/// [GNU] asm-string-literal: +/// string-literal +/// +void Parser::ParseAsmStringLiteral() { + if (!isTokenStringLiteral()) { + Diag(Tok, diag::err_expected_string_literal); + return; + } + + ExprResult Res = ParseStringLiteralExpression(); + if (Res.isInvalid) return; + + // TODO: Diagnose: wide string literal in 'asm' +} + +/// ParseSimpleAsm +/// +/// [GNU] simple-asm-expr: +/// 'asm' '(' asm-string-literal ')' +/// +void Parser::ParseSimpleAsm() { + assert(Tok.getKind() == tok::kw_asm && "Not an asm!"); + ConsumeToken(); + + if (Tok.getKind() != tok::l_paren) { + Diag(Tok, diag::err_expected_lparen_after, "asm"); + return; + } + + SourceLocation Loc = ConsumeParen(); + + ParseAsmStringLiteral(); + + MatchRHSPunctuation(tok::r_paren, Loc); +} + diff --git a/README.txt b/README.txt new file mode 100644 index 0000000000..9ec1cc4a3d --- /dev/null +++ b/README.txt @@ -0,0 +1,171 @@ +//===----------------------------------------------------------------------===// +// C Language Family Front-end +//===----------------------------------------------------------------------===// + Chris Lattner + +I. Introduction: + + clang: noun + 1. A loud, resonant, metallic sound. + 2. The strident call of a crane or goose. + 3. C-language family front-end toolkit. + + The world needs better compiler tools, tools which are built as libraries. This + design point allows reuse of the tools in new and novel ways. However, building + the tools as libraries isn't enough: they must have clean APIs, be as + decoupled from each other as possible, and be easy to modify/extend. This + requires clean layering, decent design, and avoiding tying the libraries to a + specific use. Oh yeah, did I mention that we want the resultant libraries to + be as fast as possible? :) + + This front-end is built as a component of the LLVM toolkit that can be used + with the LLVM backend or independently of it. In this spirit, the API has been + carefully designed as the following components: + + libsupport - Basic support library, reused from LLVM. + libsystem - System abstraction library, reused from LLVM. + + libbasic - Diagnostics, SourceLocations, SourceBuffer abstraction, + file system caching for input source files. This depends on + libsupport and libsystem. + libast - Provides classes to represent the C AST, the C type system, + builtin functions, and various helpers for analyzing and + manipulating the AST (visitors, pretty printers, etc). This + library depends on libbasic. + + liblex - C/C++/ObjC lexing and preprocessing, identifier hash table, + pragma handling, tokens, and macros. This depends on libbasic. + libparse - C (for now) parsing and local semantic analysis. This library + invokes coarse-grained 'Actions' provided by the client to do + stuff (e.g. libsema builds ASTs). This depends on liblex. + libsema - Provides a set of parser actions to build a standardized AST + for programs. AST's are 'streamed' out a top-level declaration + at a time, allowing clients to use decl-at-a-time processing, + build up entire translation units, or even build 'whole + program' ASTs depending on how they use the APIs. This depends + on libast and libparse. + + libcodegen - Lower the AST to LLVM IR for optimization & codegen. Depends + on libast. + clang - An example driver, client of the libraries at various levels. + This depends on all these libraries, and on LLVM VMCore. + + This front-end has been intentionally built as a DAG, making it easy to + reuse individual parts or replace pieces if desired. For example, to build a + preprocessor, you take the Basic and Lexer libraries. If you want an indexer, + you take those plus the Parser library and provide some actions for indexing. + If you want a refactoring, static analysis, or source-to-source compiler tool, + it makes sense to take those plus the AST building and semantic analyzer + library. Finally, if you want to use this with the LLVM backend, you'd take + these components plus the AST to LLVM lowering code. + + In the future I hope this toolkit will grow to include new and interesting + components, including a C++ front-end, ObjC support, and a whole lot of other + things. + + Finally, it should be pointed out that the goal here is to build something that + is high-quality and industrial-strength: all the obnoxious features of the C + family must be correctly supported (trigraphs, preprocessor arcana, K&R-style + prototypes, GCC/MS extensions, etc). It cannot be used if it is not 'real'. + + +II. Usage of clang driver: + + * Basic Command-Line Options: + - Help: clang --help + - Standard GCC options accepted: -E, -I*, -i*, -pedantic, -std=c90, etc. + - To make diagnostics more gcc-like: -fno-caret-diagnostics -fno-show-column + - Enable metric printing: -stats + + * -fsyntax-only is the default mode. + + * -E mode gives output nearly identical to GCC, though not all bugs in + whitespace calculation have been emulated (e.g. the number of blank lines + emitted). + + * -fsyntax-only is currently partially implemented, lacking some semantic + analysis. + + * -Eonly mode does all preprocessing, but does not print the output, useful for + timing the preprocessor. + + * -parse-print-callbacks prints almost no callbacks so far. + + * -parse-ast builds ASTs, but doesn't print them. This is most useful for + timing AST building vs -parse-noop. + + * -parse-ast-print prints most expression and statements nodes, but some + minor things are missing. + + * -parse-ast-check checks that diagnostic messages that are expected are + reported and that those which are reported are expected. + +III. Current advantages over GCC: + + * Column numbers are fully tracked (no 256 col limit, no GCC-style pruning). + * All diagnostics have column numbers, includes 'caret diagnostics', and they + highlight regions of interesting code (e.g. the LHS and RHS of a binop). + * Full diagnostic customization by client (can format diagnostics however they + like, e.g. in an IDE or refactoring tool) through DiagnosticClient interface. + * Built as a framework, can be reused by multiple tools. + * All languages supported linked into same library (no cc1,cc1obj, ...). + * mmap's code in read-only, does not dirty the pages like GCC (mem footprint). + * LLVM License, can be linked into non-GPL projects. + * Full diagnostic control, per diagnostic. Diagnostics are identified by ID. + * Significantly faster than GCC at semantic analysis, parsing, preprocessing + and lexing. + * Defers exposing platform-specific stuff to as late as possible, tracks use of + platform-specific features (e.g. #ifdef PPC) to allow 'portable bytecodes'. + * The lexer doesn't rely on the "lexer hack": it has no notion of scope and + does not categorize identifiers as types or variables -- this is up to the + parser to decide. + +Potential Future Features: + + * Fine grained diag control within the source (#pragma enable/disable warning). + * Better token tracking within macros? (Token came from this line, which is + a macro argument instantiated here, recursively instantiated here). + * Fast #import with a module system. + * Dependency tracking: change to header file doesn't recompile every function + that texually depends on it: recompile only those functions that need it. + + +IV. Missing Functionality / Improvements + +clang driver: + * Include search paths are hard-coded into the driver. + +File Manager: + * Reduce syscalls, see NOTES.txt. + +Lexer: + * Source character mapping. GCC supports ASCII and UTF-8. + See GCC options: -ftarget-charset and -ftarget-wide-charset. + * Universal character support. Experimental in GCC, enabled with + -fextended-identifiers. + * -fpreprocessed mode. + +Preprocessor: + * Know about apple header maps. + * #assert/#unassert + * #line / #file directives (currently accepted and ignored). + * MSExtension: "L#param" stringizes to a wide string literal. + * Charize extension: "#define F(o) #@o F(a)" -> 'a'. + * Consider merging the parser's expression parser into the preprocessor to + eliminate duplicate code. + * Add support for -M* + +Traditional Preprocessor: + * All. + +Parser: + * C90/K&R modes are only partially implemented. + * __extension__, __attribute__ [currently just skipped and ignored]. + * "initializers", GCC inline asm. + +Semantic Analysis: + * Perhaps 75% done. + +Code Gen: + * Mostly missing. + diff --git a/Sema/ASTStreamer.cpp b/Sema/ASTStreamer.cpp new file mode 100644 index 0000000000..1057d93281 --- /dev/null +++ b/Sema/ASTStreamer.cpp @@ -0,0 +1,111 @@ +//===--- ASTStreamer.cpp - Provide streaming interface to ASTs ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ASTStreamer interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Sema/ASTStreamer.h" +#include "clang/AST/ASTContext.h" +#include "Sema.h" +#include "clang/Parse/Action.h" +#include "clang/Parse/Parser.h" +using namespace clang; + +namespace { + class ASTStreamer { + Parser P; + std::vector<Decl*> LastInGroupList; + public: + ASTStreamer(Preprocessor &pp, ASTContext &ctxt, unsigned MainFileID) + : P(pp, *new Sema(pp, ctxt, LastInGroupList)) { + pp.EnterSourceFile(MainFileID, 0, true); + + // Initialize the parser. + P.Initialize(); + } + + /// ReadTopLevelDecl - Parse and return the next top-level declaration. + Decl *ReadTopLevelDecl(); + + void PrintStats() const; + + ~ASTStreamer() { + P.Finalize(); + delete &P.getActions(); + } + }; +} + +/// ReadTopLevelDecl - Parse and return the next top-level declaration. +/// +Decl *ASTStreamer::ReadTopLevelDecl() { + Parser::DeclTy *Result; + + /// If the previous time through we read something like 'int X, Y', return + /// the next declarator. + if (!LastInGroupList.empty()) { + Result = LastInGroupList.back(); + LastInGroupList.pop_back(); + return static_cast<Decl*>(Result); + } + + do { + if (P.ParseTopLevelDecl(Result)) + return 0; // End of file. + + // If we got a null return and something *was* parsed, try again. This + // is due to a top-level semicolon, an action override, or a parse error + // skipping something. + } while (Result == 0); + + // If we parsed a declspec with multiple declarators, reverse the list and + // return the first one. + if (!LastInGroupList.empty()) { + LastInGroupList.push_back((Decl*)Result); + std::reverse(LastInGroupList.begin(), LastInGroupList.end()); + Result = LastInGroupList.back(); + LastInGroupList.pop_back(); + } + + return static_cast<Decl*>(Result); +} + +void ASTStreamer::PrintStats() const { +} + +//===----------------------------------------------------------------------===// +// Public interface to the file +//===----------------------------------------------------------------------===// + +/// ASTStreamer_Init - Create an ASTStreamer with the specified preprocessor +/// and FileID. +ASTStreamerTy *clang::ASTStreamer_Init(Preprocessor &pp, ASTContext &ctxt, + unsigned MainFileID) { + return new ASTStreamer(pp, ctxt, MainFileID); +} + +/// ASTStreamer_ReadTopLevelDecl - Parse and return one top-level declaration. This +/// returns null at end of file. +Decl *clang::ASTStreamer_ReadTopLevelDecl(ASTStreamerTy *Streamer) { + return static_cast<ASTStreamer*>(Streamer)->ReadTopLevelDecl(); +} + + +/// ASTStreamer_PrintStats - Emit statistic information to stderr. +/// +void clang::ASTStreamer_PrintStats(ASTStreamerTy *Streamer) { + return static_cast<ASTStreamer*>(Streamer)->PrintStats(); +} + +/// ASTStreamer_Terminate - Gracefully shut down the streamer. +/// +void clang::ASTStreamer_Terminate(ASTStreamerTy *Streamer) { + delete static_cast<ASTStreamer*>(Streamer); +} diff --git a/Sema/Makefile b/Sema/Makefile new file mode 100644 index 0000000000..83a1effe21 --- /dev/null +++ b/Sema/Makefile @@ -0,0 +1,23 @@ +##===- clang/Sema/Makefile ---------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file was developed by Chris Lattner and is distributed under +# the University of Illinois Open Source License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This implements the semantic analyzer and AST builder library for the +# C-Language front-end. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME := clangSEMA +BUILD_ARCHIVE = 1 +CXXFLAGS = -fno-rtti + +CPPFLAGS += -I$(PROJ_SRC_DIR)/../include + +include $(LEVEL)/Makefile.common + diff --git a/Sema/Sema.cpp b/Sema/Sema.cpp new file mode 100644 index 0000000000..64c18a80c0 --- /dev/null +++ b/Sema/Sema.cpp @@ -0,0 +1,88 @@ +//===--- Sema.cpp - AST Builder and Semantic Analysis Implementation ------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the actions class which performs semantic analysis and +// builds an AST out of a parse stream. +// +//===----------------------------------------------------------------------===// + +#include "Sema.h" +#include "clang/AST/ASTContext.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Basic/Diagnostic.h" +using namespace clang; + +Sema::Sema(Preprocessor &pp, ASTContext &ctxt, std::vector<Decl*> &prevInGroup) + : PP(pp), Context(ctxt), CurFunctionDecl(0), LastInGroupList(prevInGroup) { +} + +//===----------------------------------------------------------------------===// +// Helper functions. +//===----------------------------------------------------------------------===// + +bool Sema::Diag(SourceLocation Loc, unsigned DiagID) { + PP.getDiagnostics().Report(Loc, DiagID); + return true; +} + +bool Sema::Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg) { + PP.getDiagnostics().Report(Loc, DiagID, &Msg, 1); + return true; +} + +bool Sema::Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg1, + const std::string &Msg2) { + std::string MsgArr[] = { Msg1, Msg2 }; + PP.getDiagnostics().Report(Loc, DiagID, MsgArr, 2); + return true; +} + +bool Sema::Diag(SourceLocation Loc, unsigned DiagID, SourceRange Range) { + PP.getDiagnostics().Report(Loc, DiagID, 0, 0, &Range, 1); + return true; +} + +bool Sema::Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg, + SourceRange Range) { + PP.getDiagnostics().Report(Loc, DiagID, &Msg, 1, &Range, 1); + return true; +} + +bool Sema::Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg1, + const std::string &Msg2, SourceRange Range) { + std::string MsgArr[] = { Msg1, Msg2 }; + PP.getDiagnostics().Report(Loc, DiagID, MsgArr, 2, &Range, 1); + return true; +} + +bool Sema::Diag(SourceLocation Loc, unsigned DiagID, + SourceRange R1, SourceRange R2) { + SourceRange RangeArr[] = { R1, R2 }; + PP.getDiagnostics().Report(Loc, DiagID, 0, 0, RangeArr, 2); + return true; +} + +bool Sema::Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg, + SourceRange R1, SourceRange R2) { + SourceRange RangeArr[] = { R1, R2 }; + PP.getDiagnostics().Report(Loc, DiagID, &Msg, 1, RangeArr, 2); + return true; +} + +bool Sema::Diag(SourceLocation Range, unsigned DiagID, const std::string &Msg1, + const std::string &Msg2, SourceRange R1, SourceRange R2) { + std::string MsgArr[] = { Msg1, Msg2 }; + SourceRange RangeArr[] = { R1, R2 }; + PP.getDiagnostics().Report(Range, DiagID, MsgArr, 2, RangeArr, 2); + return true; +} + +const LangOptions &Sema::getLangOptions() const { + return PP.getLangOptions(); +} diff --git a/Sema/Sema.h b/Sema/Sema.h new file mode 100644 index 0000000000..ac85868b8e --- /dev/null +++ b/Sema/Sema.h @@ -0,0 +1,353 @@ +//===--- Sema.h - Semantic Analysis & AST Building --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Sema class, which performs semantic analysis and +// builds ASTs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_SEMA_H +#define LLVM_CLANG_AST_SEMA_H + +#include "clang/Parse/Action.h" +#include "llvm/ADT/DenseMap.h" +#include <vector> +#include <string> + +namespace clang { + class ASTContext; + class Preprocessor; + class Decl; + class Expr; + class VarDecl; + class ParmVarDecl; + class TypedefDecl; + class FunctionDecl; + class QualType; + class LangOptions; + class DeclaratorChunk; + class LexerToken; + class IntegerLiteral; + class ArrayType; + class LabelStmt; + +/// Sema - This implements semantic analysis and AST building for C. +class Sema : public Action { + Preprocessor &PP; + + ASTContext &Context; + + /// CurFunctionDecl - If inside of a function body, this contains a pointer to + /// the function decl for the function being parsed. + FunctionDecl *CurFunctionDecl; + + /// LastInGroupList - This vector is populated when there are multiple + /// declarators in a single decl group (e.g. "int A, B, C"). In this case, + /// all but the last decl will be entered into this. This is used by the + /// ASTStreamer. + std::vector<Decl*> &LastInGroupList; + + /// LabelMap - This is a mapping from label identifiers to the LabelStmt for + /// it (which acts like the label decl in some ways). Forward referenced + /// labels have a LabelStmt created for them with a null location & SubStmt. + llvm::DenseMap<IdentifierInfo*, LabelStmt*> LabelMap; +public: + Sema(Preprocessor &pp, ASTContext &ctxt, std::vector<Decl*> &prevInGroup); + + const LangOptions &getLangOptions() const; + + /// The primitive diagnostic helpers - always returns true, which simplifies + /// error handling (i.e. less code). + bool Diag(SourceLocation Loc, unsigned DiagID); + bool Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg); + bool Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg1, + const std::string &Msg2); + + /// More expressive diagnostic helpers for expressions (say that 6 times:-) + bool Diag(SourceLocation Loc, unsigned DiagID, SourceRange R1); + bool Diag(SourceLocation Loc, unsigned DiagID, + SourceRange R1, SourceRange R2); + bool Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg, + SourceRange R1); + bool Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg, + SourceRange R1, SourceRange R2); + bool Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg1, + const std::string &Msg2, SourceRange R1); + bool Diag(SourceLocation Loc, unsigned DiagID, + const std::string &Msg1, const std::string &Msg2, + SourceRange R1, SourceRange R2); + + //===--------------------------------------------------------------------===// + // Type Analysis / Processing: SemaType.cpp. + // + QualType GetTypeForDeclarator(Declarator &D, Scope *S); + + virtual TypeResult ParseTypeName(Scope *S, Declarator &D); + + virtual TypeResult ParseParamDeclaratorType(Scope *S, Declarator &D); +private: + //===--------------------------------------------------------------------===// + // Symbol table / Decl tracking callbacks: SemaDecl.cpp. + // + virtual DeclTy *isTypeName(const IdentifierInfo &II, Scope *S) const; + virtual DeclTy *ParseDeclarator(Scope *S, Declarator &D, ExprTy *Init, + DeclTy *LastInGroup); + virtual DeclTy *FinalizeDeclaratorGroup(Scope *S, DeclTy *Group); + + virtual DeclTy *ParseStartOfFunctionDef(Scope *S, Declarator &D); + virtual DeclTy *ParseFunctionDefBody(DeclTy *Decl, StmtTy *Body); + virtual void PopScope(SourceLocation Loc, Scope *S); + + /// ParsedFreeStandingDeclSpec - This method is invoked when a declspec with + /// no declarator (e.g. "struct foo;") is parsed. + virtual DeclTy *ParsedFreeStandingDeclSpec(Scope *S, DeclSpec &DS); + + virtual DeclTy *ParseTag(Scope *S, unsigned TagType, TagKind TK, + SourceLocation KWLoc, IdentifierInfo *Name, + SourceLocation NameLoc, AttributeList *Attr); + virtual DeclTy *ParseField(Scope *S, DeclTy *TagDecl,SourceLocation DeclStart, + Declarator &D, ExprTy *BitfieldWidth); + virtual void ParseRecordBody(SourceLocation RecLoc, DeclTy *TagDecl, + DeclTy **Fields, unsigned NumFields); + virtual DeclTy *ParseEnumConstant(Scope *S, DeclTy *EnumDecl, + DeclTy *LastEnumConstant, + SourceLocation IdLoc, IdentifierInfo *Id, + SourceLocation EqualLoc, ExprTy *Val); + virtual void ParseEnumBody(SourceLocation EnumLoc, DeclTy *EnumDecl, + DeclTy **Elements, unsigned NumElements); +private: + /// Subroutines of ParseDeclarator()... + TypedefDecl *ParseTypedefDecl(Scope *S, Declarator &D, Decl *LastDeclarator); + TypedefDecl *MergeTypeDefDecl(TypedefDecl *New, Decl *Old); + FunctionDecl *MergeFunctionDecl(FunctionDecl *New, Decl *Old); + VarDecl *MergeVarDecl(VarDecl *New, Decl *Old); + /// AddTopLevelDecl - called after the decl has been fully processed. + /// Allows for bookkeeping and post-processing of each declaration. + void AddTopLevelDecl(Decl *current, Decl *last); + + /// More parsing and symbol table subroutines... + ParmVarDecl *ParseParamDeclarator(DeclaratorChunk &FI, unsigned ArgNo, + Scope *FnBodyScope); + Decl *LookupScopedDecl(IdentifierInfo *II, unsigned NSI, SourceLocation IdLoc, + Scope *S); + Decl *LazilyCreateBuiltin(IdentifierInfo *II, unsigned ID, Scope *S); + Decl *ImplicitlyDefineFunction(SourceLocation Loc, IdentifierInfo &II, + Scope *S); + // Decl attributes - this routine is the top level dispatcher. + void HandleDeclAttributes(Decl *New, AttributeList *declspec_prefix, + AttributeList *declarator_postfix); + void HandleDeclAttribute(Decl *New, AttributeList *rawAttr); + + // HandleVectorTypeAttribute - this attribute is only applicable to + // integral and float scalars, although arrays, pointers, and function + // return values are allowed in conjunction with this construct. Aggregates + // with this attribute are invalid, even if they are of the same size as a + // corresponding scalar. + // The raw attribute should contain precisely 1 argument, the vector size + // for the variable, measured in bytes. If curType and rawAttr are well + // formed, this routine will return a new vector type. + QualType HandleVectorTypeAttribute(QualType curType, AttributeList *rawAttr); + + //===--------------------------------------------------------------------===// + // Statement Parsing Callbacks: SemaStmt.cpp. +public: + virtual StmtResult ParseExprStmt(ExprTy *Expr); + + virtual StmtResult ParseNullStmt(SourceLocation SemiLoc); + virtual StmtResult ParseCompoundStmt(SourceLocation L, SourceLocation R, + StmtTy **Elts, unsigned NumElts); + virtual StmtResult ParseDeclStmt(DeclTy *Decl); + virtual StmtResult ParseCaseStmt(SourceLocation CaseLoc, ExprTy *LHSVal, + SourceLocation DotDotDotLoc, ExprTy *RHSVal, + SourceLocation ColonLoc, StmtTy *SubStmt); + virtual StmtResult ParseDefaultStmt(SourceLocation DefaultLoc, + SourceLocation ColonLoc, StmtTy *SubStmt); + virtual StmtResult ParseLabelStmt(SourceLocation IdentLoc, IdentifierInfo *II, + SourceLocation ColonLoc, StmtTy *SubStmt); + virtual StmtResult ParseIfStmt(SourceLocation IfLoc, ExprTy *CondVal, + StmtTy *ThenVal, SourceLocation ElseLoc, + StmtTy *ElseVal); + virtual StmtResult ParseSwitchStmt(SourceLocation SwitchLoc, ExprTy *Cond, + StmtTy *Body); + virtual StmtResult ParseWhileStmt(SourceLocation WhileLoc, ExprTy *Cond, + StmtTy *Body); + virtual StmtResult ParseDoStmt(SourceLocation DoLoc, StmtTy *Body, + SourceLocation WhileLoc, ExprTy *Cond); + + virtual StmtResult ParseForStmt(SourceLocation ForLoc, + SourceLocation LParenLoc, + StmtTy *First, ExprTy *Second, ExprTy *Third, + SourceLocation RParenLoc, StmtTy *Body); + virtual StmtResult ParseGotoStmt(SourceLocation GotoLoc, + SourceLocation LabelLoc, + IdentifierInfo *LabelII); + virtual StmtResult ParseIndirectGotoStmt(SourceLocation GotoLoc, + SourceLocation StarLoc, + ExprTy *DestExp); + virtual StmtResult ParseContinueStmt(SourceLocation ContinueLoc, + Scope *CurScope); + virtual StmtResult ParseBreakStmt(SourceLocation GotoLoc, Scope *CurScope); + + virtual StmtResult ParseReturnStmt(SourceLocation ReturnLoc, + ExprTy *RetValExp); + + //===--------------------------------------------------------------------===// + // Expression Parsing Callbacks: SemaExpr.cpp. + + // Primary Expressions. + virtual ExprResult ParseIdentifierExpr(Scope *S, SourceLocation Loc, + IdentifierInfo &II, + bool HasTrailingLParen); + virtual ExprResult ParseSimplePrimaryExpr(SourceLocation Loc, + tok::TokenKind Kind); + virtual ExprResult ParseNumericConstant(const LexerToken &); + virtual ExprResult ParseCharacterConstant(const LexerToken &); + virtual ExprResult ParseParenExpr(SourceLocation L, SourceLocation R, + ExprTy *Val); + + /// ParseStringLiteral - The specified tokens were lexed as pasted string + /// fragments (e.g. "foo" "bar" L"baz"). + virtual ExprResult ParseStringLiteral(const LexerToken *Toks, unsigned NumToks); + + // Binary/Unary Operators. 'Tok' is the token for the operator. + virtual ExprResult ParseUnaryOp(SourceLocation OpLoc, tok::TokenKind Op, + ExprTy *Input); + virtual ExprResult + ParseSizeOfAlignOfTypeExpr(SourceLocation OpLoc, bool isSizeof, + SourceLocation LParenLoc, TypeTy *Ty, + SourceLocation RParenLoc); + + virtual ExprResult ParsePostfixUnaryOp(SourceLocation OpLoc, + tok::TokenKind Kind, ExprTy *Input); + + virtual ExprResult ParseArraySubscriptExpr(ExprTy *Base, SourceLocation LLoc, + ExprTy *Idx, SourceLocation RLoc); + virtual ExprResult ParseMemberReferenceExpr(ExprTy *Base,SourceLocation OpLoc, + tok::TokenKind OpKind, + SourceLocation MemberLoc, + IdentifierInfo &Member); + + /// ParseCallExpr - Handle a call to Fn with the specified array of arguments. + /// This provides the location of the left/right parens and a list of comma + /// locations. + virtual ExprResult ParseCallExpr(ExprTy *Fn, SourceLocation LParenLoc, + ExprTy **Args, unsigned NumArgs, + SourceLocation *CommaLocs, + SourceLocation RParenLoc); + + virtual ExprResult ParseCastExpr(SourceLocation LParenLoc, TypeTy *Ty, + SourceLocation RParenLoc, ExprTy *Op); + + virtual ExprResult ParseBinOp(SourceLocation TokLoc, tok::TokenKind Kind, + ExprTy *LHS,ExprTy *RHS); + + /// ParseConditionalOp - Parse a ?: operation. Note that 'LHS' may be null + /// in the case of a the GNU conditional expr extension. + virtual ExprResult ParseConditionalOp(SourceLocation QuestionLoc, + SourceLocation ColonLoc, + ExprTy *Cond, ExprTy *LHS, ExprTy *RHS); + + /// ParseAddrLabel - Parse the GNU address of label extension: "&&foo". + virtual ExprResult ParseAddrLabel(SourceLocation OpLoc, SourceLocation LabLoc, + IdentifierInfo *LabelII); + + /// ParseCXXCasts - Parse {dynamic,static,reinterpret,const}_cast's. + virtual ExprResult ParseCXXCasts(SourceLocation OpLoc, tok::TokenKind Kind, + SourceLocation LAngleBracketLoc, TypeTy *Ty, + SourceLocation RAngleBracketLoc, + SourceLocation LParenLoc, ExprTy *E, + SourceLocation RParenLoc); + + /// ParseCXXBoolLiteral - Parse {true,false} literals. + virtual ExprResult ParseCXXBoolLiteral(SourceLocation OpLoc, + tok::TokenKind Kind); +private: + // UsualUnaryConversions - promotes integers (C99 6.3.1.1p2) and converts + // functions and arrays to their respective pointers (C99 6.3.2.1) + QualType UsualUnaryConversions(QualType t); + // UsualArithmeticConversions - performs the UsualUnaryConversions on it's + // operands and then handles various conversions that are common to binary + // operators (C99 6.3.1.8). If both operands aren't arithmetic, this + // routine returns the first non-arithmetic type found. The client is + // responsible for emitting appropriate error diagnostics. + QualType UsualArithmeticConversions(QualType &t1, QualType &t2); + // DefaultFunctionArrayConversion - converts functions and arrays + // to their respective pointers (C99 6.3.2.1). If the type isn't a function + // or array, this routine simply returns the input type (unmodified). + QualType DefaultFunctionArrayConversion(QualType t); + + enum AssignmentCheckResult { + Compatible, + Incompatible, + PointerFromInt, + IntFromPointer, + IncompatiblePointer, + CompatiblePointerDiscardsQualifiers + }; + // CheckAssignmentConstraints - conversions for assignment, argument passing, + // variable initialization, and function return values. Currently used by + // CheckAssignmentOperands, ParseCallExpr, and ParseReturnStmt. C99 6.5.16. + AssignmentCheckResult CheckAssignmentConstraints(QualType lhs, QualType rhs); + // Helper function for CheckAssignmentConstraints (C99 6.5.16.1p1) + AssignmentCheckResult CheckPointerTypesForAssignment(QualType lhsType, + QualType rhsType); + + /// the following "Check" methods will return a valid/converted QualType + /// or a null QualType (indicating an error diagnostic was issued). + + /// type checking binary operators (subroutines of ParseBinOp). + inline void InvalidOperands(SourceLocation l, Expr *lex, Expr *rex); + inline QualType CheckVectorOperands(SourceLocation l, Expr *lex, Expr *rex); + inline QualType CheckMultiplyDivideOperands( // C99 6.5.5 + Expr *lex, Expr *rex, SourceLocation OpLoc); + inline QualType CheckRemainderOperands( // C99 6.5.5 + Expr *lex, Expr *rex, SourceLocation OpLoc); + inline QualType CheckAdditionOperands( // C99 6.5.6 + Expr *lex, Expr *rex, SourceLocation OpLoc); + inline QualType CheckSubtractionOperands( // C99 6.5.6 + Expr *lex, Expr *rex, SourceLocation OpLoc); + inline QualType CheckShiftOperands( // C99 6.5.7 + Expr *lex, Expr *rex, SourceLocation OpLoc); + inline QualType CheckRelationalOperands( // C99 6.5.8 + Expr *lex, Expr *rex, SourceLocation OpLoc); + inline QualType CheckEqualityOperands( // C99 6.5.9 + Expr *lex, Expr *rex, SourceLocation OpLoc); + inline QualType CheckBitwiseOperands( // C99 6.5.[10...12] + Expr *lex, Expr *rex, SourceLocation OpLoc); + inline QualType CheckLogicalOperands( // C99 6.5.[13,14] + Expr *lex, Expr *rex, SourceLocation OpLoc); + // CheckAssignmentOperands is used for both simple and compound assignment. + // For simple assignment, pass both expressions and a null converted type. + // For compound assignment, pass both expressions and the converted type. + inline QualType CheckAssignmentOperands( // C99 6.5.16.[1,2] + Expr *lex, Expr *rex, SourceLocation OpLoc, QualType convertedType); + inline QualType CheckCommaOperands( // C99 6.5.17 + Expr *lex, Expr *rex, SourceLocation OpLoc); + inline QualType CheckConditionalOperands( // C99 6.5.15 + Expr *cond, Expr *lhs, Expr *rhs, SourceLocation questionLoc); + + /// type checking unary operators (subroutines of ParseUnaryOp). + /// C99 6.5.3.1, 6.5.3.2, 6.5.3.4 + QualType CheckIncrementDecrementOperand(Expr *op, SourceLocation OpLoc); + QualType CheckAddressOfOperand(Expr *op, SourceLocation OpLoc); + QualType CheckIndirectionOperand(Expr *op, SourceLocation OpLoc); + QualType CheckSizeOfAlignOfOperand(QualType type, SourceLocation loc, + bool isSizeof); + + /// C99: 6.7.5p3: Used by ParseDeclarator/ParseField to make sure we have + /// a constant expression of type int with a value greater than zero. If the + /// array has an incomplete type or a valid constant size, return false, + /// otherwise emit a diagnostic and return true. + bool VerifyConstantArrayType(const ArrayType *ary, SourceLocation loc); +}; + + +} // end namespace clang + +#endif diff --git a/Sema/SemaDecl.cpp b/Sema/SemaDecl.cpp new file mode 100644 index 0000000000..9b98c4bce1 --- /dev/null +++ b/Sema/SemaDecl.cpp @@ -0,0 +1,1024 @@ +//===--- SemaDecl.cpp - Semantic Analysis for Declarations ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements semantic analysis for declarations. +// +//===----------------------------------------------------------------------===// + +#include "Sema.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Builtins.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Type.h" +#include "clang/Parse/DeclSpec.h" +#include "clang/Parse/Scope.h" +#include "clang/Lex/IdentifierTable.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/SmallSet.h" +using namespace clang; + +// C99: 6.7.5p3: Used by ParseDeclarator/ParseField to make sure we have +// a constant expression of type int with a value greater than zero. +bool Sema::VerifyConstantArrayType(const ArrayType *Array, + SourceLocation DeclLoc) { + const Expr *Size = Array->getSize(); + if (Size == 0) return false; // incomplete type. + + if (!Size->getType()->isIntegerType()) { + Diag(Size->getLocStart(), diag::err_array_size_non_int, + Size->getType().getAsString(), Size->getSourceRange()); + return true; + } + + // Verify that the size of the array is an integer constant expr. + SourceLocation Loc; + llvm::APSInt SizeVal(32); + if (!Size->isIntegerConstantExpr(SizeVal, &Loc)) { + // FIXME: This emits the diagnostic to enforce 6.7.2.1p8, but the message + // is wrong. It is also wrong for static variables. + // FIXME: This is also wrong for: + // int sub1(int i, char *pi) { typedef int foo[i]; + // struct bar {foo f1; int f2:3; int f3:4} *p; } + Diag(DeclLoc, diag::err_typecheck_illegal_vla, Size->getSourceRange()); + return true; + } + + // We have a constant expression with an integer type, now make sure + // value greater than zero (C99 6.7.5.2p1). + + // FIXME: This check isn't specific to static VLAs, this should be moved + // elsewhere or replicated. 'int X[-1];' inside a function should emit an + // error. + if (SizeVal.isSigned()) { + llvm::APSInt Zero(SizeVal.getBitWidth()); + Zero.setIsUnsigned(false); + if (SizeVal < Zero) { + Diag(DeclLoc, diag::err_typecheck_negative_array_size, + Size->getSourceRange()); + return true; + } else if (SizeVal == 0) { + // GCC accepts zero sized static arrays. + Diag(DeclLoc, diag::err_typecheck_zero_array_size, + Size->getSourceRange()); + } + } + return false; +} + +Sema::DeclTy *Sema::isTypeName(const IdentifierInfo &II, Scope *S) const { + return dyn_cast_or_null<TypedefDecl>(II.getFETokenInfo<Decl>()); +} + +void Sema::PopScope(SourceLocation Loc, Scope *S) { + for (Scope::decl_iterator I = S->decl_begin(), E = S->decl_end(); + I != E; ++I) { + Decl *D = static_cast<Decl*>(*I); + assert(D && "This decl didn't get pushed??"); + IdentifierInfo *II = D->getIdentifier(); + if (!II) continue; + + // Unlink this decl from the identifier. Because the scope contains decls + // in an unordered collection, and because we have multiple identifier + // namespaces (e.g. tag, normal, label),the decl may not be the first entry. + if (II->getFETokenInfo<Decl>() == D) { + // Normal case, no multiple decls in different namespaces. + II->setFETokenInfo(D->getNext()); + } else { + // Scan ahead. There are only three namespaces in C, so this loop can + // never execute more than 3 times. + Decl *SomeDecl = II->getFETokenInfo<Decl>(); + while (SomeDecl->getNext() != D) { + SomeDecl = SomeDecl->getNext(); + assert(SomeDecl && "Didn't find this decl on its identifier's chain!"); + } + SomeDecl->setNext(D->getNext()); + } + + // This will have to be revisited for C++: there we want to nest stuff in + // namespace decls etc. Even for C, we might want a top-level translation + // unit decl or something. + if (!CurFunctionDecl) + continue; + + // Chain this decl to the containing function, it now owns the memory for + // the decl. + D->setNext(CurFunctionDecl->getDeclChain()); + CurFunctionDecl->setDeclChain(D); + } +} + +/// LookupScopedDecl - Look up the inner-most declaration in the specified +/// namespace. +Decl *Sema::LookupScopedDecl(IdentifierInfo *II, unsigned NSI, + SourceLocation IdLoc, Scope *S) { + if (II == 0) return 0; + Decl::IdentifierNamespace NS = (Decl::IdentifierNamespace)NSI; + + // Scan up the scope chain looking for a decl that matches this identifier + // that is in the appropriate namespace. This search should not take long, as + // shadowing of names is uncommon, and deep shadowing is extremely uncommon. + for (Decl *D = II->getFETokenInfo<Decl>(); D; D = D->getNext()) + if (D->getIdentifierNamespace() == NS) + return D; + + // If we didn't find a use of this identifier, and if the identifier + // corresponds to a compiler builtin, create the decl object for the builtin + // now, injecting it into translation unit scope, and return it. + if (NS == Decl::IDNS_Ordinary) { + // If this is a builtin on some other target, or if this builtin varies + // across targets (e.g. in type), emit a diagnostic and mark the translation + // unit non-portable for using it. + if (II->isNonPortableBuiltin()) { + // Only emit this diagnostic once for this builtin. + II->setNonPortableBuiltin(false); + Context.Target.DiagnoseNonPortability(IdLoc, + diag::port_target_builtin_use); + } + // If this is a builtin on this (or all) targets, create the decl. + if (unsigned BuiltinID = II->getBuiltinID()) + return LazilyCreateBuiltin(II, BuiltinID, S); + } + return 0; +} + +/// LazilyCreateBuiltin - The specified Builtin-ID was first used at file scope. +/// lazily create a decl for it. +Decl *Sema::LazilyCreateBuiltin(IdentifierInfo *II, unsigned bid, Scope *S) { + Builtin::ID BID = (Builtin::ID)bid; + + QualType R = Context.BuiltinInfo.GetBuiltinType(BID, Context); + FunctionDecl *New = new FunctionDecl(SourceLocation(), II, R, + FunctionDecl::Extern, 0); + + // Find translation-unit scope to insert this function into. + while (S->getParent()) + S = S->getParent(); + S->AddDecl(New); + + // Add this decl to the end of the identifier info. + if (Decl *LastDecl = II->getFETokenInfo<Decl>()) { + // Scan until we find the last (outermost) decl in the id chain. + while (LastDecl->getNext()) + LastDecl = LastDecl->getNext(); + // Insert before (outside) it. + LastDecl->setNext(New); + } else { + II->setFETokenInfo(New); + } + // Make sure clients iterating over decls see this. + LastInGroupList.push_back(New); + + return New; +} + +/// MergeTypeDefDecl - We just parsed a typedef 'New' which has the same name +/// and scope as a previous declaration 'Old'. Figure out how to resolve this +/// situation, merging decls or emitting diagnostics as appropriate. +/// +TypedefDecl *Sema::MergeTypeDefDecl(TypedefDecl *New, Decl *OldD) { + // Verify the old decl was also a typedef. + TypedefDecl *Old = dyn_cast<TypedefDecl>(OldD); + if (!Old) { + Diag(New->getLocation(), diag::err_redefinition_different_kind, + New->getName()); + Diag(OldD->getLocation(), diag::err_previous_definition); + return New; + } + + // TODO: CHECK FOR CONFLICTS, multiple decls with same name in one scope. + // TODO: This is totally simplistic. It should handle merging functions + // together etc, merging extern int X; int X; ... + Diag(New->getLocation(), diag::err_redefinition, New->getName()); + Diag(Old->getLocation(), diag::err_previous_definition); + return New; +} + +/// MergeFunctionDecl - We just parsed a function 'New' which has the same name +/// and scope as a previous declaration 'Old'. Figure out how to resolve this +/// situation, merging decls or emitting diagnostics as appropriate. +/// +FunctionDecl *Sema::MergeFunctionDecl(FunctionDecl *New, Decl *OldD) { + // Verify the old decl was also a function. + FunctionDecl *Old = dyn_cast<FunctionDecl>(OldD); + if (!Old) { + Diag(New->getLocation(), diag::err_redefinition_different_kind, + New->getName()); + Diag(OldD->getLocation(), diag::err_previous_definition); + return New; + } + + // This is not right, but it's a start. If 'Old' is a function prototype with + // the same type as 'New', silently allow this. FIXME: We should link up decl + // objects here. + if (Old->getBody() == 0 && + Old->getCanonicalType() == New->getCanonicalType()) { + return New; + } + + // TODO: CHECK FOR CONFLICTS, multiple decls with same name in one scope. + // TODO: This is totally simplistic. It should handle merging functions + // together etc, merging extern int X; int X; ... + Diag(New->getLocation(), diag::err_redefinition, New->getName()); + Diag(Old->getLocation(), diag::err_previous_definition); + return New; +} + +/// MergeVarDecl - We just parsed a variable 'New' which has the same name +/// and scope as a previous declaration 'Old'. Figure out how to resolve this +/// situation, merging decls or emitting diagnostics as appropriate. +/// +/// FIXME: Need to carefully consider tentative definition rules (C99 6.9.2p2). +/// For example, we incorrectly complain about i1, i4 from C99 6.9.2p4. +/// +VarDecl *Sema::MergeVarDecl(VarDecl *New, Decl *OldD) { + // Verify the old decl was also a variable. + VarDecl *Old = dyn_cast<VarDecl>(OldD); + if (!Old) { + Diag(New->getLocation(), diag::err_redefinition_different_kind, + New->getName()); + Diag(OldD->getLocation(), diag::err_previous_definition); + return New; + } + // Verify the types match. + if (Old->getCanonicalType() != New->getCanonicalType()) { + Diag(New->getLocation(), diag::err_redefinition, New->getName()); + Diag(Old->getLocation(), diag::err_previous_definition); + return New; + } + // We've verified the types match, now check if Old is "extern". + if (Old->getStorageClass() != VarDecl::Extern) { + Diag(New->getLocation(), diag::err_redefinition, New->getName()); + Diag(Old->getLocation(), diag::err_previous_definition); + } + return New; +} + +/// ParsedFreeStandingDeclSpec - This method is invoked when a declspec with +/// no declarator (e.g. "struct foo;") is parsed. +Sema::DeclTy *Sema::ParsedFreeStandingDeclSpec(Scope *S, DeclSpec &DS) { + // TODO: emit error on 'int;' or 'const enum foo;'. + // TODO: emit error on 'typedef int;' + // if (!DS.isMissingDeclaratorOk()) Diag(...); + + return 0; +} + +Sema::DeclTy * +Sema::ParseDeclarator(Scope *S, Declarator &D, ExprTy *Init, + DeclTy *lastDeclarator) { + Decl *LastDeclarator = (Decl*)lastDeclarator; + IdentifierInfo *II = D.getIdentifier(); + + // See if this is a redefinition of a variable in the same scope. + Decl *PrevDecl = LookupScopedDecl(II, Decl::IDNS_Ordinary, + D.getIdentifierLoc(), S); + if (PrevDecl && !S->isDeclScope(PrevDecl)) + PrevDecl = 0; // If in outer scope, it isn't the same thing. + + Decl *New; + if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_typedef) { + TypedefDecl *NewTD = ParseTypedefDecl(S, D, LastDeclarator); + if (!NewTD) return 0; + + // Handle attributes prior to checking for duplicates in MergeVarDecl + HandleDeclAttributes(NewTD, D.getDeclSpec().getAttributes(), + D.getAttributes()); + // Merge the decl with the existing one if appropriate. + if (PrevDecl) { + NewTD = MergeTypeDefDecl(NewTD, PrevDecl); + if (NewTD == 0) return 0; + } + New = NewTD; + if (S->getParent() == 0) { + // C99 6.7.7p2: If a typedef name specifies a variably modified type + // then it shall have block scope. + if (ArrayType *ary = dyn_cast<ArrayType>(NewTD->getUnderlyingType())) { + if (VerifyConstantArrayType(ary, D.getIdentifierLoc())) + return 0; + } + } + } else if (D.isFunctionDeclarator()) { + QualType R = GetTypeForDeclarator(D, S); + if (R.isNull()) return 0; // FIXME: "auto func();" passes through... + + FunctionDecl::StorageClass SC; + switch (D.getDeclSpec().getStorageClassSpec()) { + default: assert(0 && "Unknown storage class!"); + case DeclSpec::SCS_auto: + case DeclSpec::SCS_register: + Diag(D.getIdentifierLoc(), diag::err_typecheck_sclass_func, + R.getAsString()); + return 0; + case DeclSpec::SCS_unspecified: SC = FunctionDecl::None; break; + case DeclSpec::SCS_extern: SC = FunctionDecl::Extern; break; + case DeclSpec::SCS_static: SC = FunctionDecl::Static; break; + } + + FunctionDecl *NewFD = new FunctionDecl(D.getIdentifierLoc(), II, R, SC, + LastDeclarator); + + // Merge the decl with the existing one if appropriate. + if (PrevDecl) { + NewFD = MergeFunctionDecl(NewFD, PrevDecl); + if (NewFD == 0) return 0; + } + New = NewFD; + } else { + QualType R = GetTypeForDeclarator(D, S); + if (R.isNull()) return 0; + + VarDecl *NewVD; + VarDecl::StorageClass SC; + switch (D.getDeclSpec().getStorageClassSpec()) { + default: assert(0 && "Unknown storage class!"); + case DeclSpec::SCS_unspecified: SC = VarDecl::None; break; + case DeclSpec::SCS_extern: SC = VarDecl::Extern; break; + case DeclSpec::SCS_static: SC = VarDecl::Static; break; + case DeclSpec::SCS_auto: SC = VarDecl::Auto; break; + case DeclSpec::SCS_register: SC = VarDecl::Register; break; + } + if (S->getParent() == 0) { + // File scope. C99 6.9.2p2: A declaration of an identifier for and + // object that has file scope without an initializer, and without a + // storage-class specifier or with the storage-class specifier "static", + // constitutes a tentative definition. Note: A tentative definition with + // external linkage is valid (C99 6.2.2p5). + if (!Init && SC == VarDecl::Static) { + // C99 6.9.2p3: If the declaration of an identifier for an object is + // a tentative definition and has internal linkage (C99 6.2.2p3), the + // declared type shall not be an incomplete type. + if (R->isIncompleteType()) { + Diag(D.getIdentifierLoc(), diag::err_typecheck_decl_incomplete_type, + R.getAsString()); + return 0; + } + } + // C99 6.9p2: The storage-class specifiers auto and register shall not + // appear in the declaration specifiers in an external declaration. + if (SC == VarDecl::Auto || SC == VarDecl::Register) { + Diag(D.getIdentifierLoc(), diag::err_typecheck_sclass_fscope, + R.getAsString()); + return 0; + } + // C99 6.7.5.2p2: If an identifier is declared to be an object with + // static storage duration, it shall not have a variable length array. + if (ArrayType *ary = dyn_cast<ArrayType>(R.getCanonicalType())) { + if (VerifyConstantArrayType(ary, D.getIdentifierLoc())) + return 0; + } + NewVD = new FileVarDecl(D.getIdentifierLoc(), II, R, SC, LastDeclarator); + } else { + // Block scope. C99 6.7p7: If an identifier for an object is declared with + // no linkage (C99 6.2.2p6), the type for the object shall be complete... + if (SC != VarDecl::Extern) { + if (R->isIncompleteType()) { + Diag(D.getIdentifierLoc(), diag::err_typecheck_decl_incomplete_type, + R.getAsString()); + return 0; + } + } + if (SC == VarDecl::Static) { + // C99 6.7.5.2p2: If an identifier is declared to be an object with + // static storage duration, it shall not have a variable length array. + if (ArrayType *ary = dyn_cast<ArrayType>(R.getCanonicalType())) { + if (VerifyConstantArrayType(ary, D.getIdentifierLoc())) + return 0; + } + } + NewVD = new BlockVarDecl(D.getIdentifierLoc(), II, R, SC, LastDeclarator); + } + // Handle attributes prior to checking for duplicates in MergeVarDecl + HandleDeclAttributes(NewVD, D.getDeclSpec().getAttributes(), + D.getAttributes()); + + // Merge the decl with the existing one if appropriate. + if (PrevDecl) { + NewVD = MergeVarDecl(NewVD, PrevDecl); + if (NewVD == 0) return 0; + } + New = NewVD; + } + + // If this has an identifier, add it to the scope stack. + if (II) { + New->setNext(II->getFETokenInfo<Decl>()); + II->setFETokenInfo(New); + S->AddDecl(New); + } + + if (S->getParent() == 0) + AddTopLevelDecl(New, LastDeclarator); + + return New; +} + +/// The declarators are chained together backwards, reverse the list. +Sema::DeclTy *Sema::FinalizeDeclaratorGroup(Scope *S, DeclTy *group) { + // Often we have single declarators, handle them quickly. + Decl *Group = static_cast<Decl*>(group); + if (Group == 0 || Group->getNextDeclarator() == 0) return Group; + + Decl *NewGroup = 0; + while (Group) { + Decl *Next = Group->getNextDeclarator(); + Group->setNextDeclarator(NewGroup); + NewGroup = Group; + Group = Next; + } + return NewGroup; +} + +ParmVarDecl * +Sema::ParseParamDeclarator(DeclaratorChunk &FTI, unsigned ArgNo, + Scope *FnScope) { + const DeclaratorChunk::ParamInfo &PI = FTI.Fun.ArgInfo[ArgNo]; + + IdentifierInfo *II = PI.Ident; + // TODO: CHECK FOR CONFLICTS, multiple decls with same name in one scope. + // Can this happen for params? We already checked that they don't conflict + // among each other. Here they can only shadow globals, which is ok. + if (Decl *PrevDecl = LookupScopedDecl(II, Decl::IDNS_Ordinary, + PI.IdentLoc, FnScope)) { + + } + + // FIXME: Handle storage class (auto, register). No declarator? + // TODO: Chain to previous parameter with the prevdeclarator chain? + ParmVarDecl *New = new ParmVarDecl(PI.IdentLoc, II, + QualType::getFromOpaquePtr(PI.TypeInfo), + VarDecl::None, 0); + + // If this has an identifier, add it to the scope stack. + if (II) { + New->setNext(II->getFETokenInfo<Decl>()); + II->setFETokenInfo(New); + FnScope->AddDecl(New); + } + + return New; +} + + +Sema::DeclTy *Sema::ParseStartOfFunctionDef(Scope *FnBodyScope, Declarator &D) { + assert(CurFunctionDecl == 0 && "Function parsing confused"); + assert(D.getTypeObject(0).Kind == DeclaratorChunk::Function && + "Not a function declarator!"); + DeclaratorChunk::FunctionTypeInfo &FTI = D.getTypeObject(0).Fun; + + // Verify 6.9.1p6: 'every identifier in the identifier list shall be declared' + // for a K&R function. + if (!FTI.hasPrototype) { + for (unsigned i = 0, e = FTI.NumArgs; i != e; ++i) { + if (FTI.ArgInfo[i].TypeInfo == 0) { + Diag(FTI.ArgInfo[i].IdentLoc, diag::ext_param_not_declared, + FTI.ArgInfo[i].Ident->getName()); + // Implicitly declare the argument as type 'int' for lack of a better + // type. + FTI.ArgInfo[i].TypeInfo = Context.IntTy.getAsOpaquePtr(); + } + } + + // Since this is a function definition, act as though we have information + // about the arguments. + FTI.hasPrototype = true; + } else { + // FIXME: Diagnose arguments without names in C. + + } + + Scope *GlobalScope = FnBodyScope->getParent(); + + FunctionDecl *FD = + static_cast<FunctionDecl*>(ParseDeclarator(GlobalScope, D, 0, 0)); + CurFunctionDecl = FD; + + // Create Decl objects for each parameter, adding them to the FunctionDecl. + llvm::SmallVector<ParmVarDecl*, 16> Params; + + // Check for C99 6.7.5.3p10 - foo(void) is a non-varargs function that takes + // no arguments, not a function that takes a single void argument. + if (FTI.NumArgs == 1 && !FTI.isVariadic && FTI.ArgInfo[0].Ident == 0 && + FTI.ArgInfo[0].TypeInfo == Context.VoidTy.getAsOpaquePtr()) { + // empty arg list, don't push any params. + } else { + for (unsigned i = 0, e = FTI.NumArgs; i != e; ++i) + Params.push_back(ParseParamDeclarator(D.getTypeObject(0), i,FnBodyScope)); + } + + FD->setParams(&Params[0], Params.size()); + + return FD; +} + +Sema::DeclTy *Sema::ParseFunctionDefBody(DeclTy *D, StmtTy *Body) { + FunctionDecl *FD = static_cast<FunctionDecl*>(D); + FD->setBody((Stmt*)Body); + + assert(FD == CurFunctionDecl && "Function parsing confused"); + CurFunctionDecl = 0; + + // Verify and clean out per-function state. + + // Check goto/label use. + for (llvm::DenseMap<IdentifierInfo*, LabelStmt*>::iterator + I = LabelMap.begin(), E = LabelMap.end(); I != E; ++I) { + // Verify that we have no forward references left. If so, there was a goto + // or address of a label taken, but no definition of it. Label fwd + // definitions are indicated with a null substmt. + if (I->second->getSubStmt() == 0) { + LabelStmt *L = I->second; + // Emit error. + Diag(L->getIdentLoc(), diag::err_undeclared_label_use, L->getName()); + + // At this point, we have gotos that use the bogus label. Stitch it into + // the function body so that they aren't leaked and that the AST is well + // formed. + L->setSubStmt(new NullStmt(L->getIdentLoc())); + cast<CompoundStmt>((Stmt*)Body)->push_back(L); + } + } + LabelMap.clear(); + + return FD; +} + + +/// ImplicitlyDefineFunction - An undeclared identifier was used in a function +/// call, forming a call to an implicitly defined function (per C99 6.5.1p2). +Decl *Sema::ImplicitlyDefineFunction(SourceLocation Loc, IdentifierInfo &II, + Scope *S) { + if (getLangOptions().C99) // Extension in C99. + Diag(Loc, diag::ext_implicit_function_decl, II.getName()); + else // Legal in C90, but warn about it. + Diag(Loc, diag::warn_implicit_function_decl, II.getName()); + + // FIXME: handle stuff like: + // void foo() { extern float X(); } + // void bar() { X(); } <-- implicit decl for X in another scope. + + // Set a Declarator for the implicit definition: int foo(); + const char *Dummy; + DeclSpec DS; + bool Error = DS.SetTypeSpecType(DeclSpec::TST_int, Loc, Dummy); + Error = Error; // Silence warning. + assert(!Error && "Error setting up implicit decl!"); + Declarator D(DS, Declarator::BlockContext); + D.AddTypeInfo(DeclaratorChunk::getFunction(false, false, 0, 0, Loc)); + D.SetIdentifier(&II, Loc); + + // Find translation-unit scope to insert this function into. + while (S->getParent()) + S = S->getParent(); + + return static_cast<Decl*>(ParseDeclarator(S, D, 0, 0)); +} + + +TypedefDecl *Sema::ParseTypedefDecl(Scope *S, Declarator &D, + Decl *LastDeclarator) { + assert(D.getIdentifier() && "Wrong callback for declspec without declarator"); + + QualType T = GetTypeForDeclarator(D, S); + if (T.isNull()) return 0; + + // Scope manipulation handled by caller. + return new TypedefDecl(D.getIdentifierLoc(), D.getIdentifier(), T, + LastDeclarator); +} + + +/// ParseTag - This is invoked when we see 'struct foo' or 'struct {'. In the +/// former case, Name will be non-null. In the later case, Name will be null. +/// TagType indicates what kind of tag this is. TK indicates whether this is a +/// reference/declaration/definition of a tag. +Sema::DeclTy *Sema::ParseTag(Scope *S, unsigned TagType, TagKind TK, + SourceLocation KWLoc, IdentifierInfo *Name, + SourceLocation NameLoc, AttributeList *Attr) { + // If this is a use of an existing tag, it must have a name. + assert((Name != 0 || TK == TK_Definition) && + "Nameless record must be a definition!"); + + Decl::Kind Kind; + switch (TagType) { + default: assert(0 && "Unknown tag type!"); + case DeclSpec::TST_struct: Kind = Decl::Struct; break; + case DeclSpec::TST_union: Kind = Decl::Union; break; +//case DeclSpec::TST_class: Kind = Decl::Class; break; + case DeclSpec::TST_enum: Kind = Decl::Enum; break; + } + + // If this is a named struct, check to see if there was a previous forward + // declaration or definition. + if (TagDecl *PrevDecl = + dyn_cast_or_null<TagDecl>(LookupScopedDecl(Name, Decl::IDNS_Tag, + NameLoc, S))) { + + // If this is a use of a previous tag, or if the tag is already declared in + // the same scope (so that the definition/declaration completes or + // rementions the tag), reuse the decl. + if (TK == TK_Reference || S->isDeclScope(PrevDecl)) { + // Make sure that this wasn't declared as an enum and now used as a struct + // or something similar. + if (PrevDecl->getKind() != Kind) { + Diag(KWLoc, diag::err_use_with_wrong_tag, Name->getName()); + Diag(PrevDecl->getLocation(), diag::err_previous_use); + } + + // If this is a use or a forward declaration, we're good. + if (TK != TK_Definition) + return PrevDecl; + + // Diagnose attempts to redefine a tag. + if (PrevDecl->isDefinition()) { + Diag(NameLoc, diag::err_redefinition, Name->getName()); + Diag(PrevDecl->getLocation(), diag::err_previous_definition); + // If this is a redefinition, recover by making this struct be + // anonymous, which will make any later references get the previous + // definition. + Name = 0; + } else { + // Okay, this is definition of a previously declared or referenced tag. + // Move the location of the decl to be the definition site. + PrevDecl->setLocation(NameLoc); + return PrevDecl; + } + } + // If we get here, this is a definition of a new struct type in a nested + // scope, e.g. "struct foo; void bar() { struct foo; }", just create a new + // type. + } + + // If there is an identifier, use the location of the identifier as the + // location of the decl, otherwise use the location of the struct/union + // keyword. + SourceLocation Loc = NameLoc.isValid() ? NameLoc : KWLoc; + + // Otherwise, if this is the first time we've seen this tag, create the decl. + TagDecl *New; + switch (Kind) { + default: assert(0 && "Unknown tag kind!"); + case Decl::Enum: + // FIXME: Tag decls should be chained to any simultaneous vardecls, e.g.: + // enum X { A, B, C } D; D should chain to X. + New = new EnumDecl(Loc, Name, 0); + // If this is an undefined enum, warn. + if (TK != TK_Definition) Diag(Loc, diag::ext_forward_ref_enum); + break; + case Decl::Union: + case Decl::Struct: + case Decl::Class: + // FIXME: Tag decls should be chained to any simultaneous vardecls, e.g.: + // struct X { int A; } D; D should chain to X. + New = new RecordDecl(Kind, Loc, Name, 0); + break; + } + + // If this has an identifier, add it to the scope stack. + if (Name) { + New->setNext(Name->getFETokenInfo<Decl>()); + Name->setFETokenInfo(New); + S->AddDecl(New); + } + + return New; +} + +/// ParseField - Each field of a struct/union/class is passed into this in order +/// to create a FieldDecl object for it. +Sema::DeclTy *Sema::ParseField(Scope *S, DeclTy *TagDecl, + SourceLocation DeclStart, + Declarator &D, ExprTy *BitfieldWidth) { + IdentifierInfo *II = D.getIdentifier(); + Expr *BitWidth = (Expr*)BitfieldWidth; + + SourceLocation Loc = DeclStart; + if (II) Loc = D.getIdentifierLoc(); + + // FIXME: Unnamed fields can be handled in various different ways, for + // example, unnamed unions inject all members into the struct namespace! + + + if (BitWidth) { + // TODO: Validate. + //printf("WARNING: BITFIELDS IGNORED!\n"); + + // 6.7.2.1p3 + // 6.7.2.1p4 + + } else { + // Not a bitfield. + + // validate II. + + } + + QualType T = GetTypeForDeclarator(D, S); + if (T.isNull()) return 0; + + // C99 6.7.2.1p8: A member of a structure or union may have any type other + // than a variably modified type. + if (ArrayType *ary = dyn_cast<ArrayType>(T.getCanonicalType())) { + if (VerifyConstantArrayType(ary, Loc)) + return 0; + } + + // FIXME: Chain fielddecls together. + return new FieldDecl(Loc, II, T, 0); +} + +void Sema::ParseRecordBody(SourceLocation RecLoc, DeclTy *RecDecl, + DeclTy **Fields, unsigned NumFields) { + RecordDecl *Record = cast<RecordDecl>(static_cast<Decl*>(RecDecl)); + if (Record->isDefinition()) { + // Diagnose code like: + // struct S { struct S {} X; }; + // We discover this when we complete the outer S. Reject and ignore the + // outer S. + Diag(Record->getLocation(), diag::err_nested_redefinition, + Record->getKindName()); + Diag(RecLoc, diag::err_previous_definition); + return; + } + + // Verify that all the fields are okay. + unsigned NumNamedMembers = 0; + llvm::SmallVector<FieldDecl*, 32> RecFields; + llvm::SmallSet<const IdentifierInfo*, 32> FieldIDs; + + for (unsigned i = 0; i != NumFields; ++i) { + FieldDecl *FD = cast_or_null<FieldDecl>(static_cast<Decl*>(Fields[i])); + if (!FD) continue; // Already issued a diagnostic. + + // Get the type for the field. + Type *FDTy = FD->getType().getCanonicalType().getTypePtr(); + + // C99 6.7.2.1p2 - A field may not be a function type. + if (isa<FunctionType>(FDTy)) { + Diag(FD->getLocation(), diag::err_field_declared_as_function, + FD->getName()); + delete FD; + continue; + } + + // C99 6.7.2.1p2 - A field may not be an incomplete type except... + if (FDTy->isIncompleteType()) { + if (i != NumFields-1 || // ... that the last member ... + Record->getKind() != Decl::Struct || // ... of a structure ... + !isa<ArrayType>(FDTy)) { //... may have incomplete array type. + Diag(FD->getLocation(), diag::err_field_incomplete, FD->getName()); + delete FD; + continue; + } + if (NumNamedMembers < 1) { //... must have more than named member ... + Diag(FD->getLocation(), diag::err_flexible_array_empty_struct, + FD->getName()); + delete FD; + continue; + } + + // Okay, we have a legal flexible array member at the end of the struct. + Record->setHasFlexibleArrayMember(true); + } + + + /// C99 6.7.2.1p2 - a struct ending in a flexible array member cannot be the + /// field of another structure or the element of an array. + if (RecordType *FDTTy = dyn_cast<RecordType>(FDTy)) { + if (FDTTy->getDecl()->hasFlexibleArrayMember()) { + // If this is a member of a union, then entire union becomes "flexible". + if (Record->getKind() == Decl::Union) { + Record->setHasFlexibleArrayMember(true); + } else { + // If this is a struct/class and this is not the last element, reject + // it. Note that GCC supports variable sized arrays in the middle of + // structures. + if (i != NumFields-1) { + Diag(FD->getLocation(), diag::err_variable_sized_type_in_struct, + FD->getName()); + delete FD; + continue; + } + + // We support flexible arrays at the end of structs in other structs + // as an extension. + Diag(FD->getLocation(), diag::ext_flexible_array_in_struct, + FD->getName()); + Record->setHasFlexibleArrayMember(true); + } + } + } + + // Keep track of the number of named members. + if (IdentifierInfo *II = FD->getIdentifier()) { + // Detect duplicate member names. + if (!FieldIDs.insert(II)) { + Diag(FD->getLocation(), diag::err_duplicate_member, II->getName()); + // Find the previous decl. + SourceLocation PrevLoc; + for (unsigned i = 0, e = RecFields.size(); ; ++i) { + assert(i != e && "Didn't find previous def!"); + if (RecFields[i]->getIdentifier() == II) { + PrevLoc = RecFields[i]->getLocation(); + break; + } + } + Diag(PrevLoc, diag::err_previous_definition); + delete FD; + continue; + } + ++NumNamedMembers; + } + + // Remember good fields. + RecFields.push_back(FD); + } + + + // Okay, we successfully defined 'Record'. + Record->defineBody(&RecFields[0], RecFields.size()); +} + +Sema::DeclTy *Sema::ParseEnumConstant(Scope *S, DeclTy *theEnumDecl, + DeclTy *lastEnumConst, + SourceLocation IdLoc, IdentifierInfo *Id, + SourceLocation EqualLoc, ExprTy *val) { + theEnumDecl = theEnumDecl; // silence unused warning. + EnumConstantDecl *LastEnumConst = + cast_or_null<EnumConstantDecl>(static_cast<Decl*>(lastEnumConst)); + Expr *Val = static_cast<Expr*>(val); + + // Verify that there isn't already something declared with this name in this + // scope. + if (Decl *PrevDecl = LookupScopedDecl(Id, Decl::IDNS_Ordinary, IdLoc, S)) { + if (S->isDeclScope(PrevDecl)) { + if (isa<EnumConstantDecl>(PrevDecl)) + Diag(IdLoc, diag::err_redefinition_of_enumerator, Id->getName()); + else + Diag(IdLoc, diag::err_redefinition, Id->getName()); + Diag(PrevDecl->getLocation(), diag::err_previous_definition); + // FIXME: Don't leak memory: delete Val; + return 0; + } + } + + llvm::APSInt EnumVal(32); + QualType EltTy; + if (Val) { + // C99 6.7.2.2p2: Make sure we have an integer constant expression. + SourceLocation ExpLoc; + if (!Val->isIntegerConstantExpr(EnumVal, &ExpLoc)) { + Diag(ExpLoc, diag::err_enum_value_not_integer_constant_expr, + Id->getName()); + // FIXME: Don't leak memory: delete Val; + return 0; + } + EltTy = Val->getType(); + } else if (LastEnumConst) { + // Assign the last value + 1. + EnumVal = LastEnumConst->getInitVal(); + ++EnumVal; + // FIXME: detect overflow! + EltTy = LastEnumConst->getType(); + } else { + // First value, set to zero. + EltTy = Context.IntTy; + // FIXME: Resize EnumVal to the size of int. + } + + // TODO: Default promotions to int/uint. + + // TODO: If the result value doesn't fit in an int, it must be a long or long + // long value. ISO C does not support this, but GCC does as an extension, + // emit a warning. + + EnumConstantDecl *New = new EnumConstantDecl(IdLoc, Id, EltTy, Val, EnumVal, + LastEnumConst); + + // Register this decl in the current scope stack. + New->setNext(Id->getFETokenInfo<Decl>()); + Id->setFETokenInfo(New); + S->AddDecl(New); + return New; +} + +void Sema::ParseEnumBody(SourceLocation EnumLoc, DeclTy *EnumDeclX, + DeclTy **Elements, unsigned NumElements) { + EnumDecl *Enum = cast<EnumDecl>(static_cast<Decl*>(EnumDeclX)); + assert(!Enum->isDefinition() && "Enum redefinitions can't reach here"); + + // Verify that all the values are okay, and reverse the list. + EnumConstantDecl *EltList = 0; + for (unsigned i = 0; i != NumElements; ++i) { + EnumConstantDecl *ECD = + cast_or_null<EnumConstantDecl>(static_cast<Decl*>(Elements[i])); + if (!ECD) continue; // Already issued a diagnostic. + + ECD->setNextDeclarator(EltList); + EltList = ECD; + } + + Enum->defineElements(EltList); +} + +void Sema::AddTopLevelDecl(Decl *current, Decl *last) { + if (!current) return; + + // If this is a top-level decl that is chained to some other (e.g. int A,B,C;) + // remember this in the LastInGroupList list. + if (last) + LastInGroupList.push_back((Decl*)last); +} + +void Sema::HandleDeclAttribute(Decl *New, AttributeList *rawAttr) { + if (strcmp(rawAttr->getAttributeName()->getName(), "vector_size") == 0) { + if (ValueDecl *vDecl = dyn_cast<ValueDecl>(New)) { + QualType newType = HandleVectorTypeAttribute(vDecl->getType(), rawAttr); + if (!newType.isNull()) // install the new vector type into the decl + vDecl->setType(newType); + } + if (TypedefDecl *tDecl = dyn_cast<TypedefDecl>(New)) { + QualType newType = HandleVectorTypeAttribute(tDecl->getUnderlyingType(), + rawAttr); + if (!newType.isNull()) // install the new vector type into the decl + tDecl->setUnderlyingType(newType); + } + } + // FIXME: add other attributes... +} + +void Sema::HandleDeclAttributes(Decl *New, AttributeList *declspec_prefix, + AttributeList *declarator_postfix) { + while (declspec_prefix) { + HandleDeclAttribute(New, declspec_prefix); + declspec_prefix = declspec_prefix->getNext(); + } + while (declarator_postfix) { + HandleDeclAttribute(New, declarator_postfix); + declarator_postfix = declarator_postfix->getNext(); + } +} + +QualType Sema::HandleVectorTypeAttribute(QualType curType, + AttributeList *rawAttr) { + // check the attribute arugments. + if (rawAttr->getNumArgs() != 1) { + Diag(rawAttr->getAttributeLoc(), diag::err_attribute_wrong_number_arguments, + std::string("1")); + return QualType(); + } + Expr *sizeExpr = static_cast<Expr *>(rawAttr->getArg(0)); + llvm::APSInt vecSize(32); + if (!sizeExpr->isIntegerConstantExpr(vecSize)) { + Diag(rawAttr->getAttributeLoc(), diag::err_attribute_vector_size_not_int, + sizeExpr->getSourceRange()); + return QualType(); + } + // navigate to the base type - we need to provide for vector pointers, + // vector arrays, and functions returning vectors. + Type *canonType = curType.getCanonicalType().getTypePtr(); + + while (canonType->isPointerType() || canonType->isArrayType() || + canonType->isFunctionType()) { + if (PointerType *PT = dyn_cast<PointerType>(canonType)) + canonType = PT->getPointeeType().getTypePtr(); + else if (ArrayType *AT = dyn_cast<ArrayType>(canonType)) + canonType = AT->getElementType().getTypePtr(); + else if (FunctionType *FT = dyn_cast<FunctionType>(canonType)) + canonType = FT->getResultType().getTypePtr(); + } + // the base type must be integer or float. + if (!(canonType->isIntegerType() || canonType->isRealFloatingType())) { + Diag(rawAttr->getAttributeLoc(), diag::err_attribute_invalid_vector_type, + curType.getCanonicalType().getAsString()); + return QualType(); + } + BuiltinType *baseType = cast<BuiltinType>(canonType); + unsigned typeSize = baseType->getSize(); + // vecSize is specified in bytes - convert to bits. + unsigned vectorSize = vecSize.getZExtValue() * 8; + + // the vector size needs to be an integral multiple of the type size. + if (vectorSize % typeSize) { + Diag(rawAttr->getAttributeLoc(), diag::err_attribute_invalid_size, + sizeExpr->getSourceRange()); + return QualType(); + } + if (vectorSize == 0) { + Diag(rawAttr->getAttributeLoc(), diag::err_attribute_zero_size, + sizeExpr->getSourceRange()); + return QualType(); + } + // Since OpenCU requires 3 element vectors (OpenCU 5.1.2), we don't restrict + // the number of elements to be a power of two (unlike GCC). + // Instantiate the vector type, the number of elements is > 0. + return Context.convertToVectorType(curType, vectorSize/typeSize); +} + diff --git a/Sema/SemaExpr.cpp b/Sema/SemaExpr.cpp new file mode 100644 index 0000000000..4f0935a73d --- /dev/null +++ b/Sema/SemaExpr.cpp @@ -0,0 +1,1364 @@ +//===--- SemaExpr.cpp - Semantic Analysis for Expressions -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements semantic analysis for expressions. +// +//===----------------------------------------------------------------------===// + +#include "Sema.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/SmallString.h" +using namespace clang; + +/// ParseStringLiteral - The specified tokens were lexed as pasted string +/// fragments (e.g. "foo" "bar" L"baz"). The result string has to handle string +/// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from +/// multiple tokens. However, the common case is that StringToks points to one +/// string. +/// +Action::ExprResult +Sema::ParseStringLiteral(const LexerToken *StringToks, unsigned NumStringToks) { + assert(NumStringToks && "Must have at least one string!"); + + StringLiteralParser Literal(StringToks, NumStringToks, PP, Context.Target); + if (Literal.hadError) + return ExprResult(true); + + llvm::SmallVector<SourceLocation, 4> StringTokLocs; + for (unsigned i = 0; i != NumStringToks; ++i) + StringTokLocs.push_back(StringToks[i].getLocation()); + + // FIXME: handle wchar_t + QualType t = Context.getPointerType(Context.CharTy); + + // Pass &StringTokLocs[0], StringTokLocs.size() to factory! + return new StringLiteral(Literal.GetString(), Literal.GetStringLength(), + Literal.AnyWide, t, StringToks[0].getLocation(), + StringToks[NumStringToks-1].getLocation()); +} + + +/// ParseIdentifierExpr - The parser read an identifier in expression context, +/// validate it per-C99 6.5.1. HasTrailingLParen indicates whether this +/// identifier is used in an function call context. +Sema::ExprResult Sema::ParseIdentifierExpr(Scope *S, SourceLocation Loc, + IdentifierInfo &II, + bool HasTrailingLParen) { + // Could be enum-constant or decl. + Decl *D = LookupScopedDecl(&II, Decl::IDNS_Ordinary, Loc, S); + if (D == 0) { + // Otherwise, this could be an implicitly declared function reference (legal + // in C90, extension in C99). + if (HasTrailingLParen && + // Not in C++. + !getLangOptions().CPlusPlus) + D = ImplicitlyDefineFunction(Loc, II, S); + else { + // If this name wasn't predeclared and if this is not a function call, + // diagnose the problem. + return Diag(Loc, diag::err_undeclared_var_use, II.getName()); + } + } + + if (ValueDecl *VD = dyn_cast<ValueDecl>(D)) + return new DeclRefExpr(VD, VD->getType(), Loc); + if (isa<TypedefDecl>(D)) + return Diag(Loc, diag::err_unexpected_typedef, II.getName()); + + assert(0 && "Invalid decl"); +} + +Sema::ExprResult Sema::ParseSimplePrimaryExpr(SourceLocation Loc, + tok::TokenKind Kind) { + switch (Kind) { + default: + assert(0 && "Unknown simple primary expr!"); + // TODO: MOVE this to be some other callback. + case tok::kw___func__: // primary-expression: __func__ [C99 6.4.2.2] + case tok::kw___FUNCTION__: // primary-expression: __FUNCTION__ [GNU] + case tok::kw___PRETTY_FUNCTION__: // primary-expression: __P..Y_F..N__ [GNU] + return 0; + } +} + +Sema::ExprResult Sema::ParseCharacterConstant(const LexerToken &Tok) { + llvm::SmallString<16> CharBuffer; + CharBuffer.resize(Tok.getLength()); + const char *ThisTokBegin = &CharBuffer[0]; + unsigned ActualLength = PP.getSpelling(Tok, ThisTokBegin); + + CharLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength, + Tok.getLocation(), PP); + if (Literal.hadError()) + return ExprResult(true); + return new CharacterLiteral(Literal.getValue(), Context.IntTy, + Tok.getLocation()); +} + +Action::ExprResult Sema::ParseNumericConstant(const LexerToken &Tok) { + // fast path for a single digit (which is quite common). A single digit + // cannot have a trigraph, escaped newline, radix prefix, or type suffix. + if (Tok.getLength() == 1) { + const char *t = PP.getSourceManager().getCharacterData(Tok.getLocation()); + + unsigned IntSize = Context.Target.getIntWidth(Tok.getLocation()); + return ExprResult(new IntegerLiteral(llvm::APInt(IntSize, *t-'0'), + Context.IntTy, + Tok.getLocation())); + } + llvm::SmallString<512> IntegerBuffer; + IntegerBuffer.resize(Tok.getLength()); + const char *ThisTokBegin = &IntegerBuffer[0]; + + // Get the spelling of the token, which eliminates trigraphs, etc. + unsigned ActualLength = PP.getSpelling(Tok, ThisTokBegin); + NumericLiteralParser Literal(ThisTokBegin, ThisTokBegin+ActualLength, + Tok.getLocation(), PP); + if (Literal.hadError) + return ExprResult(true); + + if (Literal.isIntegerLiteral()) { + QualType t; + + // Get the value in the widest-possible width. + llvm::APInt ResultVal(Context.Target.getIntMaxTWidth(Tok.getLocation()), 0); + + if (Literal.GetIntegerValue(ResultVal)) { + // If this value didn't fit into uintmax_t, warn and force to ull. + Diag(Tok.getLocation(), diag::warn_integer_too_large); + t = Context.UnsignedLongLongTy; + assert(Context.getIntegerBitwidth(t, Tok.getLocation()) == + ResultVal.getBitWidth() && "long long is not intmax_t?"); + } else { + // If this value fits into a ULL, try to figure out what else it fits into + // according to the rules of C99 6.4.4.1p5. + + // Octal, Hexadecimal, and integers with a U suffix are allowed to + // be an unsigned int. + bool AllowUnsigned = Literal.isUnsigned || Literal.getRadix() != 10; + + // Check from smallest to largest, picking the smallest type we can. + if (!Literal.isLong) { // Are int/unsigned possibilities? + unsigned IntSize = Context.Target.getIntWidth(Tok.getLocation()); + // Does it fit in a unsigned int? + if (ResultVal.isIntN(IntSize)) { + // Does it fit in a signed int? + if (!Literal.isUnsigned && ResultVal[IntSize-1] == 0) + t = Context.IntTy; + else if (AllowUnsigned) + t = Context.UnsignedIntTy; + } + + if (!t.isNull()) + ResultVal.trunc(IntSize); + } + + // Are long/unsigned long possibilities? + if (t.isNull() && !Literal.isLongLong) { + unsigned LongSize = Context.Target.getLongWidth(Tok.getLocation()); + + // Does it fit in a unsigned long? + if (ResultVal.isIntN(LongSize)) { + // Does it fit in a signed long? + if (!Literal.isUnsigned && ResultVal[LongSize-1] == 0) + t = Context.LongTy; + else if (AllowUnsigned) + t = Context.UnsignedLongTy; + } + if (!t.isNull()) + ResultVal.trunc(LongSize); + } + + // Finally, check long long if needed. + if (t.isNull()) { + unsigned LongLongSize = + Context.Target.getLongLongWidth(Tok.getLocation()); + + // Does it fit in a unsigned long long? + if (ResultVal.isIntN(LongLongSize)) { + // Does it fit in a signed long long? + if (!Literal.isUnsigned && ResultVal[LongLongSize-1] == 0) + t = Context.LongLongTy; + else if (AllowUnsigned) + t = Context.UnsignedLongLongTy; + } + } + + // If we still couldn't decide a type, we probably have something that + // does not fit in a signed long long, but has no U suffix. + if (t.isNull()) { + Diag(Tok.getLocation(), diag::warn_integer_too_large_for_signed); + t = Context.UnsignedLongLongTy; + } + } + + return new IntegerLiteral(ResultVal, t, Tok.getLocation()); + } else if (Literal.isFloatingLiteral()) { + // FIXME: handle float values > 32 (including compute the real type...). + return new FloatingLiteral(Literal.GetFloatValue(), Context.FloatTy, + Tok.getLocation()); + } + return ExprResult(true); +} + +Action::ExprResult Sema::ParseParenExpr(SourceLocation L, SourceLocation R, + ExprTy *Val) { + Expr *e = (Expr *)Val; + assert((e != 0) && "ParseParenExpr() missing expr"); + return new ParenExpr(L, R, e); +} + +/// The UsualUnaryConversions() function is *not* called by this routine. +/// See C99 6.3.2.1p[2-4] for more details. +QualType Sema::CheckSizeOfAlignOfOperand(QualType exprType, + SourceLocation OpLoc, bool isSizeof) { + // C99 6.5.3.4p1: + if (isa<FunctionType>(exprType) && isSizeof) + // alignof(function) is allowed. + Diag(OpLoc, diag::ext_sizeof_function_type); + else if (exprType->isVoidType()) + Diag(OpLoc, diag::ext_sizeof_void_type, isSizeof ? "sizeof" : "__alignof"); + else if (exprType->isIncompleteType()) { + Diag(OpLoc, isSizeof ? diag::err_sizeof_incomplete_type : + diag::err_alignof_incomplete_type, + exprType.getAsString()); + return QualType(); // error + } + // C99 6.5.3.4p4: the type (an unsigned integer type) is size_t. + return Context.getSizeType(); +} + +Action::ExprResult Sema:: +ParseSizeOfAlignOfTypeExpr(SourceLocation OpLoc, bool isSizeof, + SourceLocation LPLoc, TypeTy *Ty, + SourceLocation RPLoc) { + // If error parsing type, ignore. + if (Ty == 0) return true; + + // Verify that this is a valid expression. + QualType ArgTy = QualType::getFromOpaquePtr(Ty); + + QualType resultType = CheckSizeOfAlignOfOperand(ArgTy, OpLoc, isSizeof); + + if (resultType.isNull()) + return true; + return new SizeOfAlignOfTypeExpr(isSizeof, ArgTy, resultType, OpLoc, RPLoc); +} + + +Action::ExprResult Sema::ParsePostfixUnaryOp(SourceLocation OpLoc, + tok::TokenKind Kind, + ExprTy *Input) { + UnaryOperator::Opcode Opc; + switch (Kind) { + default: assert(0 && "Unknown unary op!"); + case tok::plusplus: Opc = UnaryOperator::PostInc; break; + case tok::minusminus: Opc = UnaryOperator::PostDec; break; + } + QualType result = CheckIncrementDecrementOperand((Expr *)Input, OpLoc); + if (result.isNull()) + return true; + return new UnaryOperator((Expr *)Input, Opc, result, OpLoc); +} + +Action::ExprResult Sema:: +ParseArraySubscriptExpr(ExprTy *Base, SourceLocation LLoc, + ExprTy *Idx, SourceLocation RLoc) { + QualType t1 = ((Expr *)Base)->getType(); + QualType t2 = ((Expr *)Idx)->getType(); + + assert(!t1.isNull() && "no type for array base expression"); + assert(!t2.isNull() && "no type for array index expression"); + + QualType canonT1 = DefaultFunctionArrayConversion(t1).getCanonicalType(); + QualType canonT2 = DefaultFunctionArrayConversion(t2).getCanonicalType(); + + // C99 6.5.2.1p2: the expression e1[e2] is by definition precisely equivalent + // to the expression *((e1)+(e2)). This means the array "Base" may actually be + // in the subscript position. As a result, we need to derive the array base + // and index from the expression types. + + Expr *baseExpr, *indexExpr; + QualType baseType, indexType; + if (isa<PointerType>(canonT1) || isa<VectorType>(canonT1)) { + baseType = canonT1; + indexType = canonT2; + baseExpr = static_cast<Expr *>(Base); + indexExpr = static_cast<Expr *>(Idx); + } else if (isa<PointerType>(canonT2)) { // uncommon + baseType = canonT2; + indexType = canonT1; + baseExpr = static_cast<Expr *>(Idx); + indexExpr = static_cast<Expr *>(Base); + } else { + return Diag(static_cast<Expr *>(Base)->getLocStart(), + diag::err_typecheck_subscript_value, + static_cast<Expr *>(Base)->getSourceRange()); + } + // C99 6.5.2.1p1 + if (!indexType->isIntegerType()) { + return Diag(indexExpr->getLocStart(), diag::err_typecheck_subscript, + indexExpr->getSourceRange()); + } + QualType resultType; + if (PointerType *ary = dyn_cast<PointerType>(baseType)) { + // FIXME: need to deal with const... + resultType = ary->getPointeeType(); + // in practice, the following check catches trying to index a pointer + // to a function (e.g. void (*)(int)). Functions are not objects in c99. + if (!resultType->isObjectType()) { + return Diag(baseExpr->getLocStart(), + diag::err_typecheck_subscript_not_object, + baseType.getAsString(), baseExpr->getSourceRange()); + } + } else if (VectorType *vec = dyn_cast<VectorType>(baseType)) + resultType = vec->getElementType(); + + return new ArraySubscriptExpr((Expr*)Base, (Expr*)Idx, resultType, RLoc); +} + +Action::ExprResult Sema:: +ParseMemberReferenceExpr(ExprTy *Base, SourceLocation OpLoc, + tok::TokenKind OpKind, SourceLocation MemberLoc, + IdentifierInfo &Member) { + QualType qualifiedType = ((Expr *)Base)->getType(); + + assert(!qualifiedType.isNull() && "no type for member expression"); + + QualType canonType = qualifiedType.getCanonicalType(); + + if (OpKind == tok::arrow) { + if (PointerType *PT = dyn_cast<PointerType>(canonType)) { + qualifiedType = PT->getPointeeType(); + canonType = qualifiedType.getCanonicalType(); + } else + return Diag(OpLoc, diag::err_typecheck_member_reference_arrow); + } + if (!isa<RecordType>(canonType)) + return Diag(OpLoc, diag::err_typecheck_member_reference_structUnion); + + // get the struct/union definition from the type. + RecordDecl *RD = cast<RecordType>(canonType)->getDecl(); + + if (canonType->isIncompleteType()) + return Diag(OpLoc, diag::err_typecheck_incomplete_tag, RD->getName()); + + FieldDecl *MemberDecl = RD->getMember(&Member); + if (!MemberDecl) + return Diag(OpLoc, diag::err_typecheck_no_member, Member.getName()); + + return new MemberExpr((Expr*)Base, OpKind == tok::arrow, + MemberDecl, MemberLoc); +} + +/// ParseCallExpr - Handle a call to Fn with the specified array of arguments. +/// This provides the location of the left/right parens and a list of comma +/// locations. +Action::ExprResult Sema:: +ParseCallExpr(ExprTy *Fn, SourceLocation LParenLoc, + ExprTy **Args, unsigned NumArgsInCall, + SourceLocation *CommaLocs, SourceLocation RParenLoc) { + Expr *funcExpr = (Expr *)Fn; + assert(funcExpr && "no function call expression"); + + QualType qType = UsualUnaryConversions(funcExpr->getType()); + assert(!qType.isNull() && "no type for function call expression"); + + // C99 6.5.2.2p1 - "The expression that denotes the called function shall have + // type pointer to function". + const PointerType *PT = dyn_cast<PointerType>(qType); + if (PT == 0) PT = dyn_cast<PointerType>(qType.getCanonicalType()); + + if (PT == 0) + return Diag(funcExpr->getLocStart(), diag::err_typecheck_call_not_function, + SourceRange(funcExpr->getLocStart(), RParenLoc)); + + const FunctionType *funcT = dyn_cast<FunctionType>(PT->getPointeeType()); + if (funcT == 0) + funcT = dyn_cast<FunctionType>(PT->getPointeeType().getCanonicalType()); + + if (funcT == 0) + return Diag(funcExpr->getLocStart(), diag::err_typecheck_call_not_function, + SourceRange(funcExpr->getLocStart(), RParenLoc)); + + // If a prototype isn't declared, the parser implicitly defines a func decl + QualType resultType = funcT->getResultType(); + + if (const FunctionTypeProto *proto = dyn_cast<FunctionTypeProto>(funcT)) { + // C99 6.5.2.2p7 - the arguments are implicitly converted, as if by + // assignment, to the types of the corresponding parameter, ... + + unsigned NumArgsInProto = proto->getNumArgs(); + unsigned NumArgsToCheck = NumArgsInCall; + + if (NumArgsInCall < NumArgsInProto) + Diag(RParenLoc, diag::err_typecheck_call_too_few_args, + funcExpr->getSourceRange()); + else if (NumArgsInCall > NumArgsInProto) { + if (!proto->isVariadic()) { + Diag(((Expr **)Args)[NumArgsInProto+1]->getLocStart(), + diag::err_typecheck_call_too_many_args, funcExpr->getSourceRange(), + ((Expr **)Args)[NumArgsInProto+1]->getSourceRange()); + } + NumArgsToCheck = NumArgsInProto; + } + // Continue to check argument types (even if we have too few/many args). + for (unsigned i = 0; i < NumArgsToCheck; i++) { + Expr *argExpr = ((Expr **)Args)[i]; + assert(argExpr && "ParseCallExpr(): missing argument expression"); + + QualType lhsType = proto->getArgType(i); + QualType rhsType = argExpr->getType(); + + if (lhsType == rhsType) // common case, fast path... + continue; + + AssignmentCheckResult result = CheckAssignmentConstraints(lhsType, + rhsType); + SourceLocation l = argExpr->getLocStart(); + + // decode the result (notice that AST's are still created for extensions). + switch (result) { + case Compatible: + break; + case PointerFromInt: + // check for null pointer constant (C99 6.3.2.3p3) + if (!argExpr->isNullPointerConstant()) { + Diag(l, diag::ext_typecheck_passing_pointer_int, + lhsType.getAsString(), rhsType.getAsString(), + funcExpr->getSourceRange(), argExpr->getSourceRange()); + } + break; + case IntFromPointer: + Diag(l, diag::ext_typecheck_passing_pointer_int, + lhsType.getAsString(), rhsType.getAsString(), + funcExpr->getSourceRange(), argExpr->getSourceRange()); + break; + case IncompatiblePointer: + Diag(l, diag::ext_typecheck_passing_incompatible_pointer, + rhsType.getAsString(), lhsType.getAsString(), + funcExpr->getSourceRange(), argExpr->getSourceRange()); + break; + case CompatiblePointerDiscardsQualifiers: + Diag(l, diag::ext_typecheck_passing_discards_qualifiers, + rhsType.getAsString(), lhsType.getAsString(), + funcExpr->getSourceRange(), argExpr->getSourceRange()); + break; + case Incompatible: + return Diag(l, diag::err_typecheck_passing_incompatible, + rhsType.getAsString(), lhsType.getAsString(), + funcExpr->getSourceRange(), argExpr->getSourceRange()); + } + } + // Even if the types checked, bail if we had the wrong number of arguments. + if ((NumArgsInCall != NumArgsInProto) && !proto->isVariadic()) + return true; + } + return new CallExpr((Expr*)Fn, (Expr**)Args, NumArgsInCall, resultType, + RParenLoc); +} + +Action::ExprResult Sema:: +ParseCastExpr(SourceLocation LParenLoc, TypeTy *Ty, + SourceLocation RParenLoc, ExprTy *Op) { + // If error parsing type, ignore. + assert((Ty != 0) && "ParseCastExpr(): missing type"); + // FIXME: Sema for cast is completely missing. + return new CastExpr(QualType::getFromOpaquePtr(Ty), (Expr*)Op, LParenLoc); +} + +inline QualType Sema::CheckConditionalOperands( // C99 6.5.15 + Expr *Cond, Expr *LHS, Expr *RHS, SourceLocation questionLoc) { + QualType cond = Cond->getType(); + QualType lhs = LHS->getType(); + QualType rhs = RHS->getType(); + + assert(!cond.isNull() && "ParseConditionalOp(): no conditional type"); + assert(!lhs.isNull() && "ParseConditionalOp(): no lhs type"); + assert(!rhs.isNull() && "ParseConditionalOp(): no rhs type"); + + cond = UsualUnaryConversions(cond); + lhs = UsualUnaryConversions(lhs); + rhs = UsualUnaryConversions(rhs); + + // first, check the condition. + if (!cond->isScalarType()) { // C99 6.5.15p2 + Diag(Cond->getLocStart(), diag::err_typecheck_cond_expect_scalar, + cond.getAsString()); + return QualType(); + } + // now check the two expressions. + if (lhs->isArithmeticType() && rhs->isArithmeticType()) // C99 6.5.15p3,5 + return UsualArithmeticConversions(lhs, rhs); + + if ((lhs->isStructureType() && rhs->isStructureType()) || // C99 6.5.15p3 + (lhs->isUnionType() && rhs->isUnionType())) { + TagType *lTag = cast<TagType>(lhs.getCanonicalType()); + TagType *rTag = cast<TagType>(rhs.getCanonicalType()); + + if (lTag->getDecl()->getIdentifier() == rTag->getDecl()->getIdentifier()) + return lhs; + else { + Diag(questionLoc, diag::err_typecheck_cond_incompatible_operands, + lhs.getAsString(), rhs.getAsString(), + LHS->getSourceRange(), RHS->getSourceRange()); + return QualType(); + } + } + if (lhs->isPointerType() && RHS->isNullPointerConstant()) // C99 6.5.15p3 + return lhs; + if (rhs->isPointerType() && LHS->isNullPointerConstant()) + return rhs; + + if (lhs->isPointerType() && rhs->isPointerType()) { // C99 6.5.15p3,6 + QualType lhptee, rhptee; + + // get the "pointed to" type + lhptee = cast<PointerType>(lhs.getCanonicalType())->getPointeeType(); + rhptee = cast<PointerType>(rhs.getCanonicalType())->getPointeeType(); + + // ignore qualifiers on void (C99 6.5.15p3, clause 6) + if (lhptee.getUnqualifiedType()->isVoidType() && + (rhptee->isObjectType() || rhptee->isIncompleteType())) + return lhs; + if (rhptee.getUnqualifiedType()->isVoidType() && + (lhptee->isObjectType() || lhptee->isIncompleteType())) + return rhs; + + // FIXME: C99 6.5.15p6: If both operands are pointers to compatible types + // *or* to differently qualified versions of compatible types, the result + // type is a pointer to an appropriately qualified version of the + // *composite* type. + if (!Type::typesAreCompatible(lhptee.getUnqualifiedType(), + rhptee.getUnqualifiedType())) { + Diag(questionLoc, diag::ext_typecheck_cond_incompatible_pointers, + lhs.getAsString(), rhs.getAsString(), + LHS->getSourceRange(), RHS->getSourceRange()); + return lhs; // FIXME: this is an _ext - is this return o.k? + } + } + if (lhs->isVoidType() && rhs->isVoidType()) // C99 6.5.15p3 + return lhs; + + Diag(questionLoc, diag::err_typecheck_cond_incompatible_operands, + lhs.getAsString(), rhs.getAsString(), + LHS->getSourceRange(), RHS->getSourceRange()); + return QualType(); +} + +/// ParseConditionalOp - Parse a ?: operation. Note that 'LHS' may be null +/// in the case of a the GNU conditional expr extension. +Action::ExprResult Sema::ParseConditionalOp(SourceLocation QuestionLoc, + SourceLocation ColonLoc, + ExprTy *Cond, ExprTy *LHS, + ExprTy *RHS) { + QualType result = CheckConditionalOperands((Expr *)Cond, (Expr *)LHS, + (Expr *)RHS, QuestionLoc); + if (result.isNull()) + return true; + return new ConditionalOperator((Expr*)Cond, (Expr*)LHS, (Expr*)RHS, result); +} + +inline QualType Sema::DefaultFunctionArrayConversion(QualType t) { + if (t->isFunctionType()) // C99 6.3.2.1p4 + return Context.getPointerType(t); + if (const ArrayType *ary = dyn_cast<ArrayType>(t.getCanonicalType())) + return Context.getPointerType(ary->getElementType()); // C99 6.3.2.1p3 + return t; +} + +/// UsualUnaryConversion - Performs various conversions that are common to most +/// operators (C99 6.3). The conversions of array and function types are +/// sometimes surpressed. For example, the array->pointer conversion doesn't +/// apply if the array is an argument to the sizeof or address (&) operators. +/// In these instances, this routine should *not* be called. +QualType Sema::UsualUnaryConversions(QualType t) { + assert(!t.isNull() && "UsualUnaryConversions - missing type"); + + if (t->isPromotableIntegerType()) // C99 6.3.1.1p2 + return Context.IntTy; + return DefaultFunctionArrayConversion(t); +} + +/// UsualArithmeticConversions - Performs various conversions that are common to +/// binary operators (C99 6.3.1.8). If both operands aren't arithmetic, this +/// routine returns the first non-arithmetic type found. The client is +/// responsible for emitting appropriate error diagnostics. +QualType Sema::UsualArithmeticConversions(QualType &lhs, QualType &rhs) { + lhs = UsualUnaryConversions(lhs); + rhs = UsualUnaryConversions(rhs); + + // If both types are identical, no conversion is needed. + if (lhs == rhs) + return lhs; + + // If either side is a non-arithmetic type (e.g. a pointer), we are done. + // The caller can deal with this (e.g. pointer + int). + if (!lhs->isArithmeticType()) + return lhs; + if (!rhs->isArithmeticType()) + return rhs; + + // At this point, we have two different arithmetic types. + + // Handle complex types first (C99 6.3.1.8p1). + if (lhs->isComplexType() || rhs->isComplexType()) { + // if we have an integer operand, the result is the complex type. + if (rhs->isIntegerType()) + return lhs; + if (lhs->isIntegerType()) + return rhs; + + return Context.maxComplexType(lhs, rhs); + } + + // Now handle "real" floating types (i.e. float, double, long double). + if (lhs->isRealFloatingType() || rhs->isRealFloatingType()) { + // if we have an integer operand, the result is the real floating type. + if (rhs->isIntegerType()) + return lhs; + if (lhs->isIntegerType()) + return rhs; + + // we have two real floating types, float/complex combos were handled above. + return Context.maxFloatingType(lhs, rhs); + } + return Context.maxIntegerType(lhs, rhs); +} + +// CheckPointerTypesForAssignment - This is a very tricky routine (despite +// being closely modeled after the C99 spec:-). The odd characteristic of this +// routine is it effectively iqnores the qualifiers on the top level pointee. +// This circumvents the usual type rules specified in 6.2.7p1 & 6.7.5.[1-3]. +// FIXME: add a couple examples in this comment. +Sema::AssignmentCheckResult +Sema::CheckPointerTypesForAssignment(QualType lhsType, QualType rhsType) { + QualType lhptee, rhptee; + + // get the "pointed to" type (ignoring qualifiers at the top level) + lhptee = cast<PointerType>(lhsType.getCanonicalType())->getPointeeType(); + rhptee = cast<PointerType>(rhsType.getCanonicalType())->getPointeeType(); + + // make sure we operate on the canonical type + lhptee = lhptee.getCanonicalType(); + rhptee = rhptee.getCanonicalType(); + + AssignmentCheckResult r = Compatible; + + // C99 6.5.16.1p1: This following citation is common to constraints + // 3 & 4 (below). ...and the type *pointed to* by the left has all the + // qualifiers of the type *pointed to* by the right; + if ((lhptee.getQualifiers() & rhptee.getQualifiers()) != + rhptee.getQualifiers()) + r = CompatiblePointerDiscardsQualifiers; + + // C99 6.5.16.1p1 (constraint 4): If one operand is a pointer to an object or + // incomplete type and the other is a pointer to a qualified or unqualified + // version of void... + if (lhptee.getUnqualifiedType()->isVoidType() && + (rhptee->isObjectType() || rhptee->isIncompleteType())) + ; + else if (rhptee.getUnqualifiedType()->isVoidType() && + (lhptee->isObjectType() || lhptee->isIncompleteType())) + ; + // C99 6.5.16.1p1 (constraint 3): both operands are pointers to qualified or + // unqualified versions of compatible types, ... + else if (!Type::typesAreCompatible(lhptee.getUnqualifiedType(), + rhptee.getUnqualifiedType())) + r = IncompatiblePointer; // this "trumps" PointerAssignDiscardsQualifiers + return r; +} + +/// CheckAssignmentConstraints (C99 6.5.16) - This routine currently +/// has code to accommodate several GCC extensions when type checking +/// pointers. Here are some objectionable examples that GCC considers warnings: +/// +/// int a, *pint; +/// short *pshort; +/// struct foo *pfoo; +/// +/// pint = pshort; // warning: assignment from incompatible pointer type +/// a = pint; // warning: assignment makes integer from pointer without a cast +/// pint = a; // warning: assignment makes pointer from integer without a cast +/// pint = pfoo; // warning: assignment from incompatible pointer type +/// +/// As a result, the code for dealing with pointers is more complex than the +/// C99 spec dictates. +/// Note: the warning above turn into errors when -pedantic-errors is enabled. +/// +Sema::AssignmentCheckResult +Sema::CheckAssignmentConstraints(QualType lhsType, QualType rhsType) { + // This check seems unnatural, however it is necessary to insure the proper + // conversion of functions/arrays. If the conversion were done for all + // DeclExpr's (created by ParseIdentifierExpr), it would mess up the unary + // expressions that surpress this implicit conversion (&, sizeof). + rhsType = DefaultFunctionArrayConversion(rhsType); + + if (lhsType->isArithmeticType() && rhsType->isArithmeticType()) { + if (lhsType->isVectorType() || rhsType->isVectorType()) { + if (lhsType.getCanonicalType() != rhsType.getCanonicalType()) + return Incompatible; + } + return Compatible; + } else if (lhsType->isPointerType()) { + if (rhsType->isIntegerType()) + return PointerFromInt; + + if (rhsType->isPointerType()) + return CheckPointerTypesForAssignment(lhsType, rhsType); + } else if (rhsType->isPointerType()) { + // C99 6.5.16.1p1: the left operand is _Bool and the right is a pointer. + if ((lhsType->isIntegerType()) && (lhsType != Context.BoolTy)) + return IntFromPointer; + + if (lhsType->isPointerType()) + return CheckPointerTypesForAssignment(lhsType, rhsType); + } else if (isa<TagType>(lhsType) && isa<TagType>(rhsType)) { + if (Type::tagTypesAreCompatible(lhsType, rhsType)) + return Compatible; + } else if (lhsType->isReferenceType() || rhsType->isReferenceType()) { + if (Type::referenceTypesAreCompatible(lhsType, rhsType)) + return Compatible; + } + return Incompatible; +} + +inline void Sema::InvalidOperands(SourceLocation loc, Expr *lex, Expr *rex) { + Diag(loc, diag::err_typecheck_invalid_operands, + lex->getType().getAsString(), rex->getType().getAsString(), + lex->getSourceRange(), rex->getSourceRange()); +} + +inline QualType Sema::CheckVectorOperands(SourceLocation loc, Expr *lex, + Expr *rex) { + QualType lhsType = lex->getType(), rhsType = rex->getType(); + + // make sure the vector types are identical. + if (lhsType == rhsType) + return lhsType; + // You cannot convert between vector values of different size. + Diag(loc, diag::err_typecheck_vector_not_convertable, + lex->getType().getAsString(), rex->getType().getAsString(), + lex->getSourceRange(), rex->getSourceRange()); + return QualType(); +} + +inline QualType Sema::CheckMultiplyDivideOperands( + Expr *lex, Expr *rex, SourceLocation loc) +{ + QualType lhsType = lex->getType(), rhsType = rex->getType(); + + if (lhsType->isVectorType() || rhsType->isVectorType()) + return CheckVectorOperands(loc, lex, rex); + QualType resType = UsualArithmeticConversions(lhsType, rhsType); + + if (resType->isArithmeticType()) + return resType; + InvalidOperands(loc, lex, rex); + return QualType(); +} + +inline QualType Sema::CheckRemainderOperands( + Expr *lex, Expr *rex, SourceLocation loc) +{ + QualType lhsType = lex->getType(), rhsType = rex->getType(); + QualType resType = UsualArithmeticConversions(lhsType, rhsType); + + if (resType->isIntegerType()) + return resType; + InvalidOperands(loc, lex, rex); + return QualType(); +} + +inline QualType Sema::CheckAdditionOperands( // C99 6.5.6 + Expr *lex, Expr *rex, SourceLocation loc) +{ + QualType lhsType = lex->getType(), rhsType = rex->getType(); + + if (lhsType->isVectorType() || rhsType->isVectorType()) + return CheckVectorOperands(loc, lex, rex); + QualType resType = UsualArithmeticConversions(lhsType, rhsType); + + // handle the common case first (both operands are arithmetic). + if (resType->isArithmeticType()) + return resType; + + if ((lhsType->isPointerType() && rhsType->isIntegerType()) || + (lhsType->isIntegerType() && rhsType->isPointerType())) + return resType; + InvalidOperands(loc, lex, rex); + return QualType(); +} + +inline QualType Sema::CheckSubtractionOperands( // C99 6.5.6 + Expr *lex, Expr *rex, SourceLocation loc) +{ + QualType lhsType = lex->getType(), rhsType = rex->getType(); + + if (lhsType->isVectorType() || rhsType->isVectorType()) + return CheckVectorOperands(loc, lex, rex); + QualType resType = UsualArithmeticConversions(lhsType, rhsType); + + // handle the common case first (both operands are arithmetic). + if (resType->isArithmeticType()) + return resType; + if ((lhsType->isPointerType() && rhsType->isIntegerType()) || + (lhsType->isPointerType() && rhsType->isPointerType())) + return resType; + InvalidOperands(loc, lex, rex); + return QualType(); +} + +inline QualType Sema::CheckShiftOperands( // C99 6.5.7 + Expr *lex, Expr *rex, SourceLocation loc) +{ + // FIXME: Shifts don't perform usual arithmetic conversions. This is wrong + // for int << longlong -> the result type should be int, not long long. + QualType lhsType = lex->getType(), rhsType = rex->getType(); + QualType resType = UsualArithmeticConversions(lhsType, rhsType); + + if (resType->isIntegerType()) + return resType; + InvalidOperands(loc, lex, rex); + return QualType(); +} + +inline QualType Sema::CheckRelationalOperands( // C99 6.5.8 + Expr *lex, Expr *rex, SourceLocation loc) +{ + QualType lType = UsualUnaryConversions(lex->getType()); + QualType rType = UsualUnaryConversions(rex->getType()); + + if (lType->isRealType() && rType->isRealType()) + return Context.IntTy; + + if (lType->isPointerType()) { + if (rType->isPointerType()) + return Context.IntTy; + if (rType->isIntegerType()) { + if (!rex->isNullPointerConstant()) + Diag(loc, diag::ext_typecheck_comparison_of_pointer_integer, + lex->getSourceRange(), rex->getSourceRange()); + return Context.IntTy; // the previous diagnostic is a GCC extension. + } + } else if (rType->isPointerType()) { + if (lType->isIntegerType()) { + if (!lex->isNullPointerConstant()) + Diag(loc, diag::ext_typecheck_comparison_of_pointer_integer, + lex->getSourceRange(), rex->getSourceRange()); + return Context.IntTy; // the previous diagnostic is a GCC extension. + } + } + InvalidOperands(loc, lex, rex); + return QualType(); +} + +inline QualType Sema::CheckEqualityOperands( // C99 6.5.9 + Expr *lex, Expr *rex, SourceLocation loc) +{ + QualType lType = UsualUnaryConversions(lex->getType()); + QualType rType = UsualUnaryConversions(rex->getType()); + + if (lType->isArithmeticType() && rType->isArithmeticType()) + return Context.IntTy; + + if (lType->isPointerType()) { + if (rType->isPointerType()) + return Context.IntTy; + if (rType->isIntegerType()) { + if (!rex->isNullPointerConstant()) + Diag(loc, diag::ext_typecheck_comparison_of_pointer_integer, + lex->getSourceRange(), rex->getSourceRange()); + return Context.IntTy; // the previous diagnostic is a GCC extension. + } + } else if (rType->isPointerType()) { + if (lType->isIntegerType()) { + if (!lex->isNullPointerConstant()) + Diag(loc, diag::ext_typecheck_comparison_of_pointer_integer, + lex->getSourceRange(), rex->getSourceRange()); + return Context.IntTy; // the previous diagnostic is a GCC extension. + } + } + InvalidOperands(loc, lex, rex); + return QualType(); +} + +inline QualType Sema::CheckBitwiseOperands( + Expr *lex, Expr *rex, SourceLocation loc) +{ + QualType lhsType = lex->getType(), rhsType = rex->getType(); + + if (lhsType->isVectorType() || rhsType->isVectorType()) + return CheckVectorOperands(loc, lex, rex); + QualType resType = UsualArithmeticConversions(lhsType, rhsType); + + if (resType->isIntegerType()) + return resType; + InvalidOperands(loc, lex, rex); + return QualType(); +} + +inline QualType Sema::CheckLogicalOperands( // C99 6.5.[13,14] + Expr *lex, Expr *rex, SourceLocation loc) +{ + QualType lhsType = UsualUnaryConversions(lex->getType()); + QualType rhsType = UsualUnaryConversions(rex->getType()); + + if (lhsType->isScalarType() || rhsType->isScalarType()) + return Context.IntTy; + InvalidOperands(loc, lex, rex); + return QualType(); +} + +inline QualType Sema::CheckAssignmentOperands( // C99 6.5.16.1 + Expr *lex, Expr *rex, SourceLocation loc, QualType compoundType) +{ + QualType lhsType = lex->getType(); + QualType rhsType = compoundType.isNull() ? rex->getType() : compoundType; + bool hadError = false; + Expr::isModifiableLvalueResult mlval = lex->isModifiableLvalue(); + + switch (mlval) { // C99 6.5.16p2 + case Expr::MLV_Valid: + break; + case Expr::MLV_ConstQualified: + Diag(loc, diag::err_typecheck_assign_const, lex->getSourceRange()); + hadError = true; + break; + case Expr::MLV_ArrayType: + Diag(loc, diag::err_typecheck_array_not_modifiable_lvalue, + lhsType.getAsString(), lex->getSourceRange()); + return QualType(); + case Expr::MLV_NotObjectType: + Diag(loc, diag::err_typecheck_non_object_not_modifiable_lvalue, + lhsType.getAsString(), lex->getSourceRange()); + return QualType(); + case Expr::MLV_InvalidExpression: + Diag(loc, diag::err_typecheck_expression_not_modifiable_lvalue, + lex->getSourceRange()); + return QualType(); + case Expr::MLV_IncompleteType: + case Expr::MLV_IncompleteVoidType: + Diag(loc, diag::err_typecheck_incomplete_type_not_modifiable_lvalue, + lhsType.getAsString(), lex->getSourceRange()); + return QualType(); + } + if (lhsType == rhsType) // common case, fast path... + return lhsType; + + AssignmentCheckResult result = CheckAssignmentConstraints(lhsType, rhsType); + + // decode the result (notice that extensions still return a type). + switch (result) { + case Compatible: + break; + case Incompatible: + Diag(loc, diag::err_typecheck_assign_incompatible, + lhsType.getAsString(), rhsType.getAsString(), + lex->getSourceRange(), rex->getSourceRange()); + hadError = true; + break; + case PointerFromInt: + // check for null pointer constant (C99 6.3.2.3p3) + if (compoundType.isNull() && !rex->isNullPointerConstant()) { + Diag(loc, diag::ext_typecheck_assign_pointer_int, + lhsType.getAsString(), rhsType.getAsString(), + lex->getSourceRange(), rex->getSourceRange()); + } + break; + case IntFromPointer: + Diag(loc, diag::ext_typecheck_assign_pointer_int, + lhsType.getAsString(), rhsType.getAsString(), + lex->getSourceRange(), rex->getSourceRange()); + break; + case IncompatiblePointer: + Diag(loc, diag::ext_typecheck_assign_incompatible_pointer, + lhsType.getAsString(), rhsType.getAsString(), + lex->getSourceRange(), rex->getSourceRange()); + break; + case CompatiblePointerDiscardsQualifiers: + Diag(loc, diag::ext_typecheck_assign_discards_qualifiers, + lhsType.getAsString(), rhsType.getAsString(), + lex->getSourceRange(), rex->getSourceRange()); + break; + } + // C99 6.5.16p3: The type of an assignment expression is the type of the + // left operand unless the left operand has qualified type, in which case + // it is the unqualified version of the type of the left operand. + // C99 6.5.16.1p2: In simple assignment, the value of the right operand + // is converted to the type of the assignment expression (above). + // C++ 5.17p1: the type of the assignment expression is that of its left oprdu. + return hadError ? QualType() : lhsType.getUnqualifiedType(); +} + +inline QualType Sema::CheckCommaOperands( // C99 6.5.17 + Expr *lex, Expr *rex, SourceLocation loc) { + return UsualUnaryConversions(rex->getType()); +} + +QualType Sema::CheckIncrementDecrementOperand(Expr *op, SourceLocation OpLoc) { + QualType lhsType = op->getType(), rhsType = Context.IntTy; + QualType resType = UsualArithmeticConversions(lhsType, rhsType); + assert(!resType.isNull() && "no type for increment/decrement expression"); + + // C99 6.5.2.4p1 + if (const PointerType *pt = dyn_cast<PointerType>(resType)) { + if (!pt->getPointeeType()->isObjectType()) { // C99 6.5.2.4p2, 6.5.6p2 + Diag(OpLoc, diag::err_typecheck_arithmetic_incomplete_type, + resType.getAsString(), op->getSourceRange()); + return QualType(); + } + } else if (!resType->isRealType()) { + // FIXME: Allow Complex as a GCC extension. + Diag(OpLoc, diag::err_typecheck_illegal_increment_decrement, + resType.getAsString(), op->getSourceRange()); + return QualType(); + } + // At this point, we know we have a real or pointer type. Now make sure + // the operand is a modifiable lvalue. + Expr::isModifiableLvalueResult mlval = op->isModifiableLvalue(); + if (mlval != Expr::MLV_Valid) { + // FIXME: emit a more precise diagnostic... + Diag(OpLoc, diag::err_typecheck_invalid_lvalue_incr_decr, + op->getSourceRange()); + return QualType(); + } + return resType; +} + +/// getPrimaryDeclaration - Helper function for CheckAddressOfOperand(). +/// This routine allows us to typecheck complex/recursive expressions +/// where the declaration is needed for type checking. Here are some +/// examples: &s.xx, &s.zz[1].yy, &(1+2), &(XX), &"123"[2]. +static Decl *getPrimaryDeclaration(Expr *e) { + switch (e->getStmtClass()) { + case Stmt::DeclRefExprClass: + return cast<DeclRefExpr>(e)->getDecl(); + case Stmt::MemberExprClass: + return getPrimaryDeclaration(cast<MemberExpr>(e)->getBase()); + case Stmt::ArraySubscriptExprClass: + return getPrimaryDeclaration(cast<ArraySubscriptExpr>(e)->getBase()); + case Stmt::CallExprClass: + return getPrimaryDeclaration(cast<CallExpr>(e)->getCallee()); + case Stmt::UnaryOperatorClass: + return getPrimaryDeclaration(cast<UnaryOperator>(e)->getSubExpr()); + case Stmt::ParenExprClass: + return getPrimaryDeclaration(cast<ParenExpr>(e)->getSubExpr()); + default: + return 0; + } +} + +/// CheckAddressOfOperand - The operand of & must be either a function +/// designator or an lvalue designating an object. If it is an lvalue, the +/// object cannot be declared with storage class register or be a bit field. +/// Note: The usual conversions are *not* applied to the operand of the & +/// operator (C99 6.3.2.1p[2-4]), and its result is never an lvalue. +QualType Sema::CheckAddressOfOperand(Expr *op, SourceLocation OpLoc) { + Decl *dcl = getPrimaryDeclaration(op); + Expr::isLvalueResult lval = op->isLvalue(); + + if (lval != Expr::LV_Valid) { // C99 6.5.3.2p1 + if (dcl && isa<FunctionDecl>(dcl)) // allow function designators + ; + else { // FIXME: emit more specific diag... + Diag(OpLoc, diag::err_typecheck_invalid_lvalue_addrof, + op->getSourceRange()); + return QualType(); + } + } else if (dcl) { + // We have an lvalue with a decl. Make sure the decl is not declared + // with the register storage-class specifier. + if (const VarDecl *vd = dyn_cast<VarDecl>(dcl)) { + if (vd->getStorageClass() == VarDecl::Register) { + Diag(OpLoc, diag::err_typecheck_address_of_register, + op->getSourceRange()); + return QualType(); + } + } else + assert(0 && "Unknown/unexpected decl type"); + + // FIXME: add check for bitfields! + } + // If the operand has type "type", the result has type "pointer to type". + return Context.getPointerType(op->getType()); +} + +QualType Sema::CheckIndirectionOperand(Expr *op, SourceLocation OpLoc) { + QualType qType = UsualUnaryConversions(op->getType()); + + assert(!qType.isNull() && "no type for * expression"); + + if (PointerType *PT = dyn_cast<PointerType>(qType.getCanonicalType())) { + QualType ptype = PT->getPointeeType(); + // C99 6.5.3.2p4. "if it points to an object,...". + if (ptype->isIncompleteType()) { // An incomplete type is not an object + // GCC compat: special case 'void *' (treat as warning). + if (ptype->isVoidType()) { + Diag(OpLoc, diag::ext_typecheck_deref_ptr_to_void, + qType.getAsString(), op->getSourceRange()); + } else { + Diag(OpLoc, diag::err_typecheck_deref_incomplete_type, + ptype.getAsString(), op->getSourceRange()); + return QualType(); + } + } + return ptype; + } + Diag(OpLoc, diag::err_typecheck_indirection_requires_pointer, + qType.getAsString(), op->getSourceRange()); + return QualType(); +} + +static inline BinaryOperator::Opcode ConvertTokenKindToBinaryOpcode( + tok::TokenKind Kind) { + BinaryOperator::Opcode Opc; + switch (Kind) { + default: assert(0 && "Unknown binop!"); + case tok::star: Opc = BinaryOperator::Mul; break; + case tok::slash: Opc = BinaryOperator::Div; break; + case tok::percent: Opc = BinaryOperator::Rem; break; + case tok::plus: Opc = BinaryOperator::Add; break; + case tok::minus: Opc = BinaryOperator::Sub; break; + case tok::lessless: Opc = BinaryOperator::Shl; break; + case tok::greatergreater: Opc = BinaryOperator::Shr; break; + case tok::lessequal: Opc = BinaryOperator::LE; break; + case tok::less: Opc = BinaryOperator::LT; break; + case tok::greaterequal: Opc = BinaryOperator::GE; break; + case tok::greater: Opc = BinaryOperator::GT; break; + case tok::exclaimequal: Opc = BinaryOperator::NE; break; + case tok::equalequal: Opc = BinaryOperator::EQ; break; + case tok::amp: Opc = BinaryOperator::And; break; + case tok::caret: Opc = BinaryOperator::Xor; break; + case tok::pipe: Opc = BinaryOperator::Or; break; + case tok::ampamp: Opc = BinaryOperator::LAnd; break; + case tok::pipepipe: Opc = BinaryOperator::LOr; break; + case tok::equal: Opc = BinaryOperator::Assign; break; + case tok::starequal: Opc = BinaryOperator::MulAssign; break; + case tok::slashequal: Opc = BinaryOperator::DivAssign; break; + case tok::percentequal: Opc = BinaryOperator::RemAssign; break; + case tok::plusequal: Opc = BinaryOperator::AddAssign; break; + case tok::minusequal: Opc = BinaryOperator::SubAssign; break; + case tok::lesslessequal: Opc = BinaryOperator::ShlAssign; break; + case tok::greatergreaterequal: Opc = BinaryOperator::ShrAssign; break; + case tok::ampequal: Opc = BinaryOperator::AndAssign; break; + case tok::caretequal: Opc = BinaryOperator::XorAssign; break; + case tok::pipeequal: Opc = BinaryOperator::OrAssign; break; + case tok::comma: Opc = BinaryOperator::Comma; break; + } + return Opc; +} + +static inline UnaryOperator::Opcode ConvertTokenKindToUnaryOpcode( + tok::TokenKind Kind) { + UnaryOperator::Opcode Opc; + switch (Kind) { + default: assert(0 && "Unknown unary op!"); + case tok::plusplus: Opc = UnaryOperator::PreInc; break; + case tok::minusminus: Opc = UnaryOperator::PreDec; break; + case tok::amp: Opc = UnaryOperator::AddrOf; break; + case tok::star: Opc = UnaryOperator::Deref; break; + case tok::plus: Opc = UnaryOperator::Plus; break; + case tok::minus: Opc = UnaryOperator::Minus; break; + case tok::tilde: Opc = UnaryOperator::Not; break; + case tok::exclaim: Opc = UnaryOperator::LNot; break; + case tok::kw_sizeof: Opc = UnaryOperator::SizeOf; break; + case tok::kw___alignof: Opc = UnaryOperator::AlignOf; break; + case tok::kw___real: Opc = UnaryOperator::Real; break; + case tok::kw___imag: Opc = UnaryOperator::Imag; break; + case tok::kw___extension__: Opc = UnaryOperator::Extension; break; + } + return Opc; +} + +// Binary Operators. 'Tok' is the token for the operator. +Action::ExprResult Sema::ParseBinOp(SourceLocation TokLoc, tok::TokenKind Kind, + ExprTy *LHS, ExprTy *RHS) { + BinaryOperator::Opcode Opc = ConvertTokenKindToBinaryOpcode(Kind); + Expr *lhs = (Expr *)LHS, *rhs = (Expr*)RHS; + + assert((lhs != 0) && "ParseBinOp(): missing left expression"); + assert((rhs != 0) && "ParseBinOp(): missing right expression"); + + QualType ResultTy; // Result type of the binary operator. + QualType CompTy; // Computation type for compound assignments (e.g. '+=') + + switch (Opc) { + default: + assert(0 && "Unknown binary expr!"); + case BinaryOperator::Assign: + ResultTy = CheckAssignmentOperands(lhs, rhs, TokLoc, QualType()); + break; + case BinaryOperator::Mul: + case BinaryOperator::Div: + ResultTy = CheckMultiplyDivideOperands(lhs, rhs, TokLoc); + break; + case BinaryOperator::Rem: + ResultTy = CheckRemainderOperands(lhs, rhs, TokLoc); + break; + case BinaryOperator::Add: + ResultTy = CheckAdditionOperands(lhs, rhs, TokLoc); + break; + case BinaryOperator::Sub: + ResultTy = CheckSubtractionOperands(lhs, rhs, TokLoc); + break; + case BinaryOperator::Shl: + case BinaryOperator::Shr: + ResultTy = CheckShiftOperands(lhs, rhs, TokLoc); + break; + case BinaryOperator::LE: + case BinaryOperator::LT: + case BinaryOperator::GE: + case BinaryOperator::GT: + ResultTy = CheckRelationalOperands(lhs, rhs, TokLoc); + break; + case BinaryOperator::EQ: + case BinaryOperator::NE: + ResultTy = CheckEqualityOperands(lhs, rhs, TokLoc); + break; + case BinaryOperator::And: + case BinaryOperator::Xor: + case BinaryOperator::Or: + ResultTy = CheckBitwiseOperands(lhs, rhs, TokLoc); + break; + case BinaryOperator::LAnd: + case BinaryOperator::LOr: + ResultTy = CheckLogicalOperands(lhs, rhs, TokLoc); + break; + case BinaryOperator::MulAssign: + case BinaryOperator::DivAssign: + CompTy = CheckMultiplyDivideOperands(lhs, rhs, TokLoc); + if (!CompTy.isNull()) + ResultTy = CheckAssignmentOperands(lhs, rhs, TokLoc, CompTy); + break; + case BinaryOperator::RemAssign: + CompTy = CheckRemainderOperands(lhs, rhs, TokLoc); + if (!CompTy.isNull()) + ResultTy = CheckAssignmentOperands(lhs, rhs, TokLoc, CompTy); + break; + case BinaryOperator::AddAssign: + CompTy = CheckAdditionOperands(lhs, rhs, TokLoc); + if (!CompTy.isNull()) + ResultTy = CheckAssignmentOperands(lhs, rhs, TokLoc, CompTy); + break; + case BinaryOperator::SubAssign: + CompTy = CheckSubtractionOperands(lhs, rhs, TokLoc); + if (!CompTy.isNull()) + ResultTy = CheckAssignmentOperands(lhs, rhs, TokLoc, CompTy); + break; + case BinaryOperator::ShlAssign: + case BinaryOperator::ShrAssign: + CompTy = CheckShiftOperands(lhs, rhs, TokLoc); + if (!CompTy.isNull()) + ResultTy = CheckAssignmentOperands(lhs, rhs, TokLoc, CompTy); + break; + case BinaryOperator::AndAssign: + case BinaryOperator::XorAssign: + case BinaryOperator::OrAssign: + CompTy = CheckBitwiseOperands(lhs, rhs, TokLoc); + if (!CompTy.isNull()) + ResultTy = CheckAssignmentOperands(lhs, rhs, TokLoc, CompTy); + break; + case BinaryOperator::Comma: + ResultTy = CheckCommaOperands(lhs, rhs, TokLoc); + break; + } + if (ResultTy.isNull()) + return true; + if (CompTy.isNull()) + return new BinaryOperator(lhs, rhs, Opc, ResultTy); + else + return new CompoundAssignOperator(lhs, rhs, Opc, ResultTy, CompTy); +} + +// Unary Operators. 'Tok' is the token for the operator. +Action::ExprResult Sema::ParseUnaryOp(SourceLocation OpLoc, tok::TokenKind Op, + ExprTy *input) { + Expr *Input = (Expr*)input; + UnaryOperator::Opcode Opc = ConvertTokenKindToUnaryOpcode(Op); + QualType resultType; + switch (Opc) { + default: + assert(0 && "Unimplemented unary expr!"); + case UnaryOperator::PreInc: + case UnaryOperator::PreDec: + resultType = CheckIncrementDecrementOperand(Input, OpLoc); + break; + case UnaryOperator::AddrOf: + resultType = CheckAddressOfOperand(Input, OpLoc); + break; + case UnaryOperator::Deref: + resultType = CheckIndirectionOperand(Input, OpLoc); + break; + case UnaryOperator::Plus: + case UnaryOperator::Minus: + resultType = UsualUnaryConversions(Input->getType()); + if (!resultType->isArithmeticType()) // C99 6.5.3.3p1 + return Diag(OpLoc, diag::err_typecheck_unary_expr, + resultType.getAsString()); + break; + case UnaryOperator::Not: // bitwise complement + if (Input->getType()->isVectorType()) + resultType = Input->getType(); + else { + resultType = UsualUnaryConversions(Input->getType()); + if (!resultType->isIntegerType()) // C99 6.5.3.3p1 + return Diag(OpLoc, diag::err_typecheck_unary_expr, + resultType.getAsString()); + } + break; + case UnaryOperator::LNot: // logical negation + // Unlike +/-/~, integer promotions aren't done here (C99 6.5.3.3p5). + resultType = DefaultFunctionArrayConversion(Input->getType()); + if (!resultType->isScalarType()) // C99 6.5.3.3p1 + return Diag(OpLoc, diag::err_typecheck_unary_expr, + resultType.getAsString()); + // LNot always has type int. C99 6.5.3.3p5. + resultType = Context.IntTy; + break; + case UnaryOperator::SizeOf: + resultType = CheckSizeOfAlignOfOperand(Input->getType(), OpLoc, true); + break; + case UnaryOperator::AlignOf: + resultType = CheckSizeOfAlignOfOperand(Input->getType(), OpLoc, false); + break; + case UnaryOperator::Extension: + // FIXME: does __extension__ cause any promotions? I would think not. + resultType = Input->getType(); + break; + } + if (resultType.isNull()) + return true; + return new UnaryOperator(Input, Opc, resultType, OpLoc); +} + +/// ParseAddrLabel - Parse the GNU address of label extension: "&&foo". +Sema::ExprResult Sema::ParseAddrLabel(SourceLocation OpLoc, + SourceLocation LabLoc, + IdentifierInfo *LabelII) { + // Look up the record for this label identifier. + LabelStmt *&LabelDecl = LabelMap[LabelII]; + + // If we haven't seen this label yet, create a forward reference. + if (LabelDecl == 0) + LabelDecl = new LabelStmt(LabLoc, LabelII, 0); + + // Create the AST node. The address of a label always has type 'void*'. + return new AddrLabel(OpLoc, LabLoc, LabelDecl, + Context.getPointerType(Context.VoidTy)); +} + diff --git a/Sema/SemaExprCXX.cpp b/Sema/SemaExprCXX.cpp new file mode 100644 index 0000000000..74be79aadd --- /dev/null +++ b/Sema/SemaExprCXX.cpp @@ -0,0 +1,44 @@ +//===--- SemaExprCXX.cpp - Semantic Analysis for Expressions --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Bill Wendling and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements semantic analysis for C++ expressions. +// +//===----------------------------------------------------------------------===// + +#include "Sema.h" +#include "clang/AST/ExprCXX.h" +using namespace clang; + +/// ParseCXXCasts - Parse {dynamic,static,reinterpret,const}_cast's. +Action::ExprResult +Sema::ParseCXXCasts(SourceLocation OpLoc, tok::TokenKind Kind, + SourceLocation LAngleBracketLoc, TypeTy *Ty, + SourceLocation RAngleBracketLoc, + SourceLocation LParenLoc, ExprTy *E, + SourceLocation RParenLoc) { + CXXCastExpr::Opcode Op; + + switch (Kind) { + default: assert(0 && "Unknown C++ cast!"); + case tok::kw_const_cast: Op = CXXCastExpr::ConstCast; break; + case tok::kw_dynamic_cast: Op = CXXCastExpr::DynamicCast; break; + case tok::kw_reinterpret_cast: Op = CXXCastExpr::ReinterpretCast; break; + case tok::kw_static_cast: Op = CXXCastExpr::StaticCast; break; + } + + return new CXXCastExpr(Op, QualType::getFromOpaquePtr(Ty), (Expr*)E, OpLoc); +} + +/// ParseCXXBoolLiteral - Parse {true,false} literals. +Action::ExprResult +Sema::ParseCXXBoolLiteral(SourceLocation OpLoc, tok::TokenKind Kind) { + assert((Kind != tok::kw_true || Kind != tok::kw_false) && + "Unknown C++ Boolean value!"); + return new CXXBoolLiteralExpr(Kind == tok::kw_true, OpLoc); +} diff --git a/Sema/SemaStmt.cpp b/Sema/SemaStmt.cpp new file mode 100644 index 0000000000..5a5ab45b44 --- /dev/null +++ b/Sema/SemaStmt.cpp @@ -0,0 +1,294 @@ +//===--- SemaStmt.cpp - Semantic Analysis for Statements ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements semantic analysis for statements. +// +//===----------------------------------------------------------------------===// + +#include "Sema.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/Expr.h" +#include "clang/Parse/Scope.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Lex/IdentifierTable.h" +using namespace clang; + +Sema::StmtResult Sema::ParseExprStmt(ExprTy *expr) { + Expr *E = static_cast<Expr*>(expr); + + // Exprs are statements, so there is no need to do a conversion here. However, + // diagnose some potentially bad code. + if (!E->hasLocalSideEffect()) + Diag(E->getExprLoc(), diag::warn_unused_expr, E->getSourceRange()); + + return E; +} + + +Sema::StmtResult Sema::ParseNullStmt(SourceLocation SemiLoc) { + return new NullStmt(SemiLoc); +} + +Sema::StmtResult Sema::ParseDeclStmt(DeclTy *decl) { + if (decl) + return new DeclStmt(static_cast<Decl *>(decl)); + else + return true; // error +} + +Action::StmtResult +Sema::ParseCompoundStmt(SourceLocation L, SourceLocation R, + StmtTy **Elts, unsigned NumElts) { + return new CompoundStmt((Stmt**)Elts, NumElts); +} + +Action::StmtResult +Sema::ParseCaseStmt(SourceLocation CaseLoc, ExprTy *lhsval, + SourceLocation DotDotDotLoc, ExprTy *RHSVal, + SourceLocation ColonLoc, StmtTy *SubStmt) { + Expr *LHSVal = ((Expr *)lhsval); + assert((LHSVal != 0) && "missing expression in case statement"); + + SourceLocation ExpLoc; + // C99 6.8.4.2p3: The expression shall be an integer constant. + if (!LHSVal->isIntegerConstantExpr(&ExpLoc)) + return Diag(ExpLoc, diag::err_case_label_not_integer_constant_expr, + LHSVal->getSourceRange()); + + // FIXME: SEMA for RHS of case range. + + return new CaseStmt(LHSVal, (Expr*)RHSVal, (Stmt*)SubStmt); +} + +Action::StmtResult +Sema::ParseDefaultStmt(SourceLocation DefaultLoc, + SourceLocation ColonLoc, StmtTy *SubStmt) { + return new DefaultStmt((Stmt*)SubStmt); +} + +Action::StmtResult +Sema::ParseLabelStmt(SourceLocation IdentLoc, IdentifierInfo *II, + SourceLocation ColonLoc, StmtTy *SubStmt) { + // Look up the record for this label identifier. + LabelStmt *&LabelDecl = LabelMap[II]; + + // If not forward referenced or defined already, just create a new LabelStmt. + if (LabelDecl == 0) + return LabelDecl = new LabelStmt(IdentLoc, II, (Stmt*)SubStmt); + + assert(LabelDecl->getID() == II && "Label mismatch!"); + + // Otherwise, this label was either forward reference or multiply defined. If + // multiply defined, reject it now. + if (LabelDecl->getSubStmt()) { + Diag(IdentLoc, diag::err_redefinition_of_label, LabelDecl->getName()); + Diag(LabelDecl->getIdentLoc(), diag::err_previous_definition); + return (Stmt*)SubStmt; + } + + // Otherwise, this label was forward declared, and we just found its real + // definition. Fill in the forward definition and return it. + LabelDecl->setIdentLoc(IdentLoc); + LabelDecl->setSubStmt((Stmt*)SubStmt); + return LabelDecl; +} + +Action::StmtResult +Sema::ParseIfStmt(SourceLocation IfLoc, ExprTy *CondVal, + StmtTy *ThenVal, SourceLocation ElseLoc, + StmtTy *ElseVal) { + Expr *condExpr = (Expr *)CondVal; + assert(condExpr && "ParseIfStmt(): missing expression"); + + QualType condType = DefaultFunctionArrayConversion(condExpr->getType()); + assert(!condType.isNull() && "ParseIfStmt(): missing expression type"); + + if (!condType->isScalarType()) // C99 6.8.4.1p1 + return Diag(IfLoc, diag::err_typecheck_statement_requires_scalar, + condType.getAsString(), condExpr->getSourceRange()); + + return new IfStmt(condExpr, (Stmt*)ThenVal, (Stmt*)ElseVal); +} + +Action::StmtResult +Sema::ParseSwitchStmt(SourceLocation SwitchLoc, ExprTy *Cond, StmtTy *Body) { + return new SwitchStmt((Expr*)Cond, (Stmt*)Body); +} + +Action::StmtResult +Sema::ParseWhileStmt(SourceLocation WhileLoc, ExprTy *Cond, StmtTy *Body) { + Expr *condExpr = (Expr *)Cond; + assert(condExpr && "ParseWhileStmt(): missing expression"); + + QualType condType = DefaultFunctionArrayConversion(condExpr->getType()); + assert(!condType.isNull() && "ParseWhileStmt(): missing expression type"); + + if (!condType->isScalarType()) // C99 6.8.5p2 + return Diag(WhileLoc, diag::err_typecheck_statement_requires_scalar, + condType.getAsString(), condExpr->getSourceRange()); + + return new WhileStmt(condExpr, (Stmt*)Body); +} + +Action::StmtResult +Sema::ParseDoStmt(SourceLocation DoLoc, StmtTy *Body, + SourceLocation WhileLoc, ExprTy *Cond) { + Expr *condExpr = (Expr *)Cond; + assert(condExpr && "ParseDoStmt(): missing expression"); + + QualType condType = DefaultFunctionArrayConversion(condExpr->getType()); + assert(!condType.isNull() && "ParseDoStmt(): missing expression type"); + + if (!condType->isScalarType()) // C99 6.8.5p2 + return Diag(DoLoc, diag::err_typecheck_statement_requires_scalar, + condType.getAsString(), condExpr->getSourceRange()); + + return new DoStmt((Stmt*)Body, condExpr); +} + +Action::StmtResult +Sema::ParseForStmt(SourceLocation ForLoc, SourceLocation LParenLoc, + StmtTy *First, ExprTy *Second, ExprTy *Third, + SourceLocation RParenLoc, StmtTy *Body) { + if (First) { + // C99 6.8.5p3: FIXME. Need to hack Parser::ParseForStatement() and + // declaration support to create a DeclStmt node. Once this is done, + // we can test for DeclStmt vs. Expr (already a sub-class of Stmt). + } + if (Second) { + Expr *testExpr = (Expr *)Second; + QualType testType = DefaultFunctionArrayConversion(testExpr->getType()); + assert(!testType.isNull() && "ParseForStmt(): missing test expression type"); + + if (!testType->isScalarType()) // C99 6.8.5p2 + return Diag(ForLoc, diag::err_typecheck_statement_requires_scalar, + testType.getAsString(), testExpr->getSourceRange()); + } + return new ForStmt((Stmt*)First, (Expr*)Second, (Expr*)Third, (Stmt*)Body); +} + + +Action::StmtResult +Sema::ParseGotoStmt(SourceLocation GotoLoc, SourceLocation LabelLoc, + IdentifierInfo *LabelII) { + // Look up the record for this label identifier. + LabelStmt *&LabelDecl = LabelMap[LabelII]; + + // If we haven't seen this label yet, create a forward reference. + if (LabelDecl == 0) + LabelDecl = new LabelStmt(LabelLoc, LabelII, 0); + + return new GotoStmt(LabelDecl); +} + +Action::StmtResult +Sema::ParseIndirectGotoStmt(SourceLocation GotoLoc,SourceLocation StarLoc, + ExprTy *DestExp) { + // FIXME: Verify that the operand is convertible to void*. + + return new IndirectGotoStmt((Expr*)DestExp); +} + +Action::StmtResult +Sema::ParseContinueStmt(SourceLocation ContinueLoc, Scope *CurScope) { + Scope *S = CurScope->getContinueParent(); + if (!S) { + // C99 6.8.6.2p1: A break shall appear only in or as a loop body. + Diag(ContinueLoc, diag::err_continue_not_in_loop); + return true; + } + + // FIXME: Remember that this continue goes with this loop. + return new ContinueStmt(); +} + +Action::StmtResult +Sema::ParseBreakStmt(SourceLocation BreakLoc, Scope *CurScope) { + Scope *S = CurScope->getBreakParent(); + if (!S) { + // C99 6.8.6.3p1: A break shall appear only in or as a switch/loop body. + Diag(BreakLoc, diag::err_break_not_in_loop_or_switch); + return true; + } + + // FIXME: Remember that this break goes with this loop/switch. + return new BreakStmt(); +} + + +Action::StmtResult +Sema::ParseReturnStmt(SourceLocation ReturnLoc, ExprTy *RetValExp) { + QualType lhsType = CurFunctionDecl->getResultType(); + + if (lhsType->isVoidType()) { + if (RetValExp) // C99 6.8.6.4p1 (ext_ since GCC warns) + Diag(ReturnLoc, diag::ext_return_has_expr, + CurFunctionDecl->getIdentifier()->getName(), + ((Expr *)RetValExp)->getSourceRange()); + return new ReturnStmt((Expr*)RetValExp); + } else { + if (!RetValExp) { + const char *funcName = CurFunctionDecl->getIdentifier()->getName(); + if (getLangOptions().C99) // C99 6.8.6.4p1 (ext_ since GCC warns) + Diag(ReturnLoc, diag::ext_return_missing_expr, funcName); + else // C90 6.6.6.4p4 + Diag(ReturnLoc, diag::warn_return_missing_expr, funcName); + return new ReturnStmt((Expr*)0); + } + } + // we have a non-void function with an expression, continue checking + QualType rhsType = ((Expr *)RetValExp)->getType(); + + if (lhsType == rhsType) // common case, fast path... + return new ReturnStmt((Expr*)RetValExp); + + // C99 6.8.6.4p3(136): The return statement is not an assignment. The + // overlap restriction of subclause 6.5.16.1 does not apply to the case of + // function return. + AssignmentCheckResult result = CheckAssignmentConstraints(lhsType, rhsType); + bool hadError = false; + + // decode the result (notice that extensions still return a type). + switch (result) { + case Compatible: + break; + case Incompatible: + Diag(ReturnLoc, diag::err_typecheck_return_incompatible, + lhsType.getAsString(), rhsType.getAsString(), + ((Expr *)RetValExp)->getSourceRange()); + hadError = true; + break; + case PointerFromInt: + // check for null pointer constant (C99 6.3.2.3p3) + if (!((Expr *)RetValExp)->isNullPointerConstant()) { + Diag(ReturnLoc, diag::ext_typecheck_return_pointer_int, + lhsType.getAsString(), rhsType.getAsString(), + ((Expr *)RetValExp)->getSourceRange()); + } + break; + case IntFromPointer: + Diag(ReturnLoc, diag::ext_typecheck_return_pointer_int, + lhsType.getAsString(), rhsType.getAsString(), + ((Expr *)RetValExp)->getSourceRange()); + break; + case IncompatiblePointer: + Diag(ReturnLoc, diag::ext_typecheck_return_incompatible_pointer, + lhsType.getAsString(), rhsType.getAsString(), + ((Expr *)RetValExp)->getSourceRange()); + break; + case CompatiblePointerDiscardsQualifiers: + Diag(ReturnLoc, diag::ext_typecheck_return_discards_qualifiers, + lhsType.getAsString(), rhsType.getAsString(), + ((Expr *)RetValExp)->getSourceRange()); + break; + } + return new ReturnStmt((Expr*)RetValExp); +} + diff --git a/Sema/SemaType.cpp b/Sema/SemaType.cpp new file mode 100644 index 0000000000..cdb767f58c --- /dev/null +++ b/Sema/SemaType.cpp @@ -0,0 +1,262 @@ +//===--- SemaType.cpp - Semantic Analysis for Types -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements type-related semantic analysis. +// +//===----------------------------------------------------------------------===// + +#include "Sema.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/Parse/DeclSpec.h" +#include "clang/Lex/IdentifierTable.h" +using namespace clang; + +/// ConvertDeclSpecToType - Convert the specified declspec to the appropriate +/// type object. This returns null on error. +static QualType ConvertDeclSpecToType(const DeclSpec &DS, ASTContext &Ctx) { + // FIXME: Should move the logic from DeclSpec::Finish to here for validity + // checking. + + switch (DS.getTypeSpecType()) { + default: return QualType(); // FIXME: Handle unimp cases! + case DeclSpec::TST_void: return Ctx.VoidTy; + case DeclSpec::TST_char: + if (DS.getTypeSpecSign() == DeclSpec::TSS_unspecified) + return Ctx.CharTy; + else if (DS.getTypeSpecSign() == DeclSpec::TSS_signed) + return Ctx.SignedCharTy; + else { + assert(DS.getTypeSpecSign() == DeclSpec::TSS_unsigned && + "Unknown TSS value"); + return Ctx.UnsignedCharTy; + } + case DeclSpec::TST_int: + if (DS.getTypeSpecSign() != DeclSpec::TSS_unsigned) { + switch (DS.getTypeSpecWidth()) { + case DeclSpec::TSW_unspecified: return Ctx.IntTy; + case DeclSpec::TSW_short: return Ctx.ShortTy; + case DeclSpec::TSW_long: return Ctx.LongTy; + case DeclSpec::TSW_longlong: return Ctx.LongLongTy; + } + } else { + switch (DS.getTypeSpecWidth()) { + case DeclSpec::TSW_unspecified: return Ctx.UnsignedIntTy; + case DeclSpec::TSW_short: return Ctx.UnsignedShortTy; + case DeclSpec::TSW_long: return Ctx.UnsignedLongTy; + case DeclSpec::TSW_longlong: return Ctx.UnsignedLongLongTy; + } + } + case DeclSpec::TST_float: + if (DS.getTypeSpecComplex() == DeclSpec::TSC_unspecified) + return Ctx.FloatTy; + assert(DS.getTypeSpecComplex() == DeclSpec::TSC_complex && + "FIXME: imaginary types not supported yet!"); + return Ctx.FloatComplexTy; + + case DeclSpec::TST_double: { + bool isLong = DS.getTypeSpecWidth() == DeclSpec::TSW_long; + if (DS.getTypeSpecComplex() == DeclSpec::TSC_unspecified) + return isLong ? Ctx.LongDoubleTy : Ctx.DoubleTy; + assert(DS.getTypeSpecComplex() == DeclSpec::TSC_complex && + "FIXME: imaginary types not supported yet!"); + return isLong ? Ctx.LongDoubleComplexTy : Ctx.DoubleComplexTy; + } + case DeclSpec::TST_bool: // _Bool or bool + return Ctx.BoolTy; + case DeclSpec::TST_decimal32: // _Decimal32 + case DeclSpec::TST_decimal64: // _Decimal64 + case DeclSpec::TST_decimal128: // _Decimal128 + assert(0 && "FIXME: GNU decimal extensions not supported yet!"); + case DeclSpec::TST_enum: + case DeclSpec::TST_union: + case DeclSpec::TST_struct: { + Decl *D = static_cast<Decl *>(DS.getTypeRep()); + assert(D && "Didn't get a decl for a enum/union/struct?"); + assert(DS.getTypeSpecWidth() == 0 && DS.getTypeSpecComplex() == 0 && + DS.getTypeSpecSign() == 0 && + "Can't handle qualifiers on typedef names yet!"); + // TypeQuals handled by caller. + return Ctx.getTagDeclType(cast<TagDecl>(D)); + } + case DeclSpec::TST_typedef: { + Decl *D = static_cast<Decl *>(DS.getTypeRep()); + assert(D && "Didn't get a decl for a typedef?"); + assert(DS.getTypeSpecWidth() == 0 && DS.getTypeSpecComplex() == 0 && + DS.getTypeSpecSign() == 0 && + "Can't handle qualifiers on typedef names yet!"); + // TypeQuals handled by caller. + return Ctx.getTypedefType(cast<TypedefDecl>(D)); + } + } +} + +/// GetTypeForDeclarator - Convert the type for the specified declarator to Type +/// instances. +QualType Sema::GetTypeForDeclarator(Declarator &D, Scope *S) { + QualType T = ConvertDeclSpecToType(D.getDeclSpec(), Context); + + // If there was an error parsing declspecs, return a null type pointer. + if (T.isNull()) return T; + + // Apply const/volatile/restrict qualifiers to T. + T = T.getQualifiedType(D.getDeclSpec().getTypeQualifiers()); + + // Walk the DeclTypeInfo, building the recursive type as we go. DeclTypeInfos + // are ordered from the identifier out, which is opposite of what we want :). + for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) { + const DeclaratorChunk &DeclType = D.getTypeObject(e-i-1); + switch (DeclType.Kind) { + default: assert(0 && "Unknown decltype!"); + case DeclaratorChunk::Pointer: + if (isa<ReferenceType>(T.getCanonicalType().getTypePtr())) { + // C++ 8.3.2p4: There shall be no ... pointers to references ... + Diag(D.getIdentifierLoc(), diag::err_illegal_decl_pointer_to_reference, + D.getIdentifier()->getName()); + return QualType(); + } + + // Apply the pointer typequals to the pointer object. + T = Context.getPointerType(T).getQualifiedType(DeclType.Ptr.TypeQuals); + break; + case DeclaratorChunk::Reference: + if (isa<ReferenceType>(T.getCanonicalType().getTypePtr())) { + // C++ 8.3.2p4: There shall be no references to references ... + Diag(D.getIdentifierLoc(), + diag::err_illegal_decl_reference_to_reference, + D.getIdentifier()->getName()); + return QualType(); + } + + T = Context.getReferenceType(T); + break; + case DeclaratorChunk::Array: { + const DeclaratorChunk::ArrayTypeInfo &ATI = DeclType.Arr; + ArrayType::ArraySizeModifier ASM; + if (ATI.isStar) + ASM = ArrayType::Star; + else if (ATI.hasStatic) + ASM = ArrayType::Static; + else + ASM = ArrayType::Normal; + + Type *CanonicalT = T.getCanonicalType().getTypePtr(); + + // C99 6.7.5.2p1: If the element type is an incomplete or function type, + // reject it (e.g. void ary[7], struct foo ary[7], void ary[7]()) + if (T->isIncompleteType()) { + Diag(D.getIdentifierLoc(), diag::err_illegal_decl_array_incomplete_type, + T.getAsString()); + return QualType(); + } else if (isa<FunctionType>(CanonicalT)) { + Diag(D.getIdentifierLoc(), diag::err_illegal_decl_array_of_functions, + D.getIdentifier()->getName()); + return QualType(); + } else if (isa<ReferenceType>(CanonicalT)) { + // C++ 8.3.2p4: There shall be no ... arrays of references ... + Diag(D.getIdentifierLoc(), diag::err_illegal_decl_array_of_references, + D.getIdentifier()->getName()); + return QualType(); + } else if (RecordType *EltTy = dyn_cast<RecordType>(CanonicalT)) { + // If the element type is a struct or union that contains a variadic + // array, reject it: C99 6.7.2.1p2. + if (EltTy->getDecl()->hasFlexibleArrayMember()) { + Diag(DeclType.Loc, diag::err_flexible_array_in_array, + T.getAsString()); + return QualType(); + } + } + T = Context.getArrayType(T, ASM, ATI.TypeQuals, + static_cast<Expr *>(ATI.NumElts)); + break; + } + case DeclaratorChunk::Function: + // If the function declarator has a prototype (i.e. it is not () and + // does not have a K&R-style identifier list), then the arguments are part + // of the type, otherwise the argument list is (). + const DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun; + if (!FTI.hasPrototype) { + // Simple void foo(), where the incoming T is the result type. + T = Context.getFunctionTypeNoProto(T); + + // C99 6.7.5.3p3: Reject int(x,y,z) when it's not a function definition. + if (FTI.NumArgs != 0) + Diag(FTI.ArgInfo[0].IdentLoc, diag::err_ident_list_in_fn_declaration); + + } else { + // Otherwise, we have a function with an argument list that is + // potentially variadic. + llvm::SmallVector<QualType, 16> ArgTys; + + for (unsigned i = 0, e = FTI.NumArgs; i != e; ++i) { + QualType ArgTy = QualType::getFromOpaquePtr(FTI.ArgInfo[i].TypeInfo); + if (ArgTy.isNull()) + return QualType(); // Error occurred parsing argument type. + + // Look for 'void'. void is allowed only as a single argument to a + // function with no other parameters (C99 6.7.5.3p10). We record + // int(void) as a FunctionTypeProto with an empty argument list. + if (ArgTy->isVoidType()) { + // If this is something like 'float(int, void)', reject it. 'void' + // is an incomplete type (C99 6.2.5p19) and function decls cannot + // have arguments of incomplete type. + if (FTI.NumArgs != 1 || FTI.isVariadic) { + Diag(DeclType.Loc, diag::err_void_only_param); + return QualType(); + } + // Reject, but continue to parse 'int(void abc)'. + if (FTI.ArgInfo[i].Ident) + Diag(FTI.ArgInfo[i].IdentLoc, + diag::err_void_param_with_identifier); + + // Reject, but continue to parse 'float(const void)'. + if (ArgTy.getQualifiers()) + Diag(DeclType.Loc, diag::err_void_param_qualified); + + // Do not add 'void' to the ArgTys list. + break; + } + + ArgTys.push_back(ArgTy); + } + T = Context.getFunctionType(T, &ArgTys[0], ArgTys.size(), + FTI.isVariadic); + } + break; + } + } + + return T; +} + +Sema::TypeResult Sema::ParseTypeName(Scope *S, Declarator &D) { + // C99 6.7.6: Type names have no identifier. This is already validated by + // the parser. + assert(D.getIdentifier() == 0 && "Type name should have no identifier!"); + + QualType T = GetTypeForDeclarator(D, S); + + // If the type of the declarator was invalid, this is an invalid typename. + if (T.isNull()) + return true; + + return T.getAsOpaquePtr(); +} + +Sema::TypeResult Sema::ParseParamDeclaratorType(Scope *S, Declarator &D) { + // Note: parameters have identifiers, but we don't care about them here, we + // just want the type converted. + QualType T = GetTypeForDeclarator(D, S); + + // If the type of the declarator was invalid, this is an invalid typename. + if (T.isNull()) + return true; + + return T.getAsOpaquePtr(); +} diff --git a/TODO.txt b/TODO.txt new file mode 100644 index 0000000000..d2b944bb6b --- /dev/null +++ b/TODO.txt @@ -0,0 +1,27 @@ +//===---------------------------------------------------------------------===// +// Minor random things that can be improved +//===---------------------------------------------------------------------===// + + + +//===---------------------------------------------------------------------===// + +Lexer-related diagnostics should point to the problematic character, not the +start of the token. For example: + +int y = 0000\ +00080; + +diag.c:4:9: error: invalid digit '8' in octal constant +int y = 0000\ + ^ + +should be: + +diag.c:4:9: error: invalid digit '8' in octal constant +00080; + ^ + +//===---------------------------------------------------------------------===// + + diff --git a/clang.xcodeproj/project.pbxproj b/clang.xcodeproj/project.pbxproj new file mode 100644 index 0000000000..560076196a --- /dev/null +++ b/clang.xcodeproj/project.pbxproj @@ -0,0 +1,781 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 42; + objects = { + +/* Begin PBXBuildFile section */ + 1A30A9E90B93A4C800201A91 /* ExprCXX.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 1A30A9E80B93A4C800201A91 /* ExprCXX.h */; }; + 1A869A700BA2164C008DA07A /* LiteralSupport.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 1A869A6E0BA2164C008DA07A /* LiteralSupport.h */; }; + 1A869AA80BA21ABA008DA07A /* LiteralSupport.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1A869AA70BA21ABA008DA07A /* LiteralSupport.cpp */; }; + 84D9A8880C1A57E100AC7ABC /* AttributeList.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 84D9A8870C1A57E100AC7ABC /* AttributeList.cpp */; }; + 84D9A88C0C1A581300AC7ABC /* AttributeList.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 84D9A88B0C1A581300AC7ABC /* AttributeList.h */; }; + DE01DA490B12ADA300AC22CE /* PPCallbacks.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE01DA480B12ADA300AC22CE /* PPCallbacks.h */; }; + DE06756C0C051CFE00EBBFD8 /* ParseExprCXX.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE06756B0C051CFE00EBBFD8 /* ParseExprCXX.cpp */; }; + DE06B73E0A8307640050E87E /* LangOptions.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE06B73D0A8307640050E87E /* LangOptions.h */; }; + DE06BECB0A854E4B0050E87E /* Scope.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE06BECA0A854E4B0050E87E /* Scope.h */; }; + DE06D4310A8BB52D0050E87E /* Parser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE06D42F0A8BB52D0050E87E /* Parser.cpp */; }; + DE06E8140A8FF9330050E87E /* Action.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE06E8130A8FF9330050E87E /* Action.h */; }; + DE0FCA630A95859D00248FD5 /* Expr.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE0FCA620A95859D00248FD5 /* Expr.h */; }; + DE0FCB340A9C21F100248FD5 /* Expr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE0FCB330A9C21F100248FD5 /* Expr.cpp */; }; + DE1733000B068B700080B521 /* ASTContext.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE1732FF0B068B700080B521 /* ASTContext.cpp */; }; + DE17336E0B068DC20080B521 /* DeclSpec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE17336D0B068DC20080B521 /* DeclSpec.cpp */; }; + DE1733700B068DC60080B521 /* DeclSpec.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE17336F0B068DC60080B521 /* DeclSpec.h */; }; + DE1F22030A7D852A00FBF588 /* Parser.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE1F22020A7D852A00FBF588 /* Parser.h */; }; + DE344AB80AE5DF6D00DBC861 /* HeaderSearch.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE344AB70AE5DF6D00DBC861 /* HeaderSearch.h */; }; + DE344B540AE5E46C00DBC861 /* HeaderSearch.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE344B530AE5E46C00DBC861 /* HeaderSearch.cpp */; }; + DE3450D70AEB543100DBC861 /* DirectoryLookup.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE3450D60AEB543100DBC861 /* DirectoryLookup.h */; }; + DE3451580AEC176100DBC861 /* MacroExpander.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE3451570AEC176100DBC861 /* MacroExpander.cpp */; }; + DE3452410AEF1A2D00DBC861 /* Stmt.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE3452400AEF1A2D00DBC861 /* Stmt.cpp */; }; + DE3452810AEF1B1800DBC861 /* Stmt.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE3452800AEF1B1800DBC861 /* Stmt.h */; }; + DE345C1A0AFC658B00DBC861 /* StmtVisitor.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE345C190AFC658B00DBC861 /* StmtVisitor.h */; }; + DE345C570AFC69E800DBC861 /* StmtVisitor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE345C560AFC69E800DBC861 /* StmtVisitor.cpp */; }; + DE345F220AFD347900DBC861 /* StmtNodes.def in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE345F210AFD347900DBC861 /* StmtNodes.def */; }; + DE3460000AFDCC1900DBC861 /* ParseObjc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE345FFF0AFDCC1900DBC861 /* ParseObjc.cpp */; }; + DE3460050AFDCC6500DBC861 /* ParseInit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE3460040AFDCC6500DBC861 /* ParseInit.cpp */; }; + DE34600B0AFDCCBF00DBC861 /* ParseStmt.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE34600A0AFDCCBF00DBC861 /* ParseStmt.cpp */; }; + DE34600F0AFDCCCE00DBC861 /* ParseDecl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE34600E0AFDCCCE00DBC861 /* ParseDecl.cpp */; }; + DE3460130AFDCCDA00DBC861 /* ParseExpr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE3460120AFDCCDA00DBC861 /* ParseExpr.cpp */; }; + DE3461270AFE68BE00DBC861 /* MinimalAction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE3461260AFE68BE00DBC861 /* MinimalAction.cpp */; }; + DE34621D0AFEB19B00DBC861 /* StmtPrinter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE34621C0AFEB19B00DBC861 /* StmtPrinter.cpp */; }; + DE3464220B03040900DBC861 /* Type.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE3464210B03040900DBC861 /* Type.h */; }; + DE4264FC0C113592005A861D /* CGDecl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE4264FB0C113592005A861D /* CGDecl.cpp */; }; + DE46BF280AE0A82D00CC047C /* TargetInfo.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE46BF270AE0A82D00CC047C /* TargetInfo.h */; }; + DE4772FA0C10EAE5002239E8 /* CGStmt.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE4772F90C10EAE5002239E8 /* CGStmt.cpp */; }; + DE4772FC0C10EAEC002239E8 /* CGExpr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE4772FB0C10EAEC002239E8 /* CGExpr.cpp */; }; + DE5932D10AD60FF400BC794C /* clang.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE5932CD0AD60FF400BC794C /* clang.cpp */; }; + DE5932D20AD60FF400BC794C /* clang.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE5932CE0AD60FF400BC794C /* clang.h */; }; + DE5932D30AD60FF400BC794C /* PrintParserCallbacks.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE5932CF0AD60FF400BC794C /* PrintParserCallbacks.cpp */; }; + DE5932D40AD60FF400BC794C /* PrintPreprocessedOutput.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE5932D00AD60FF400BC794C /* PrintPreprocessedOutput.cpp */; }; + DE67E70B0C020EC500F66BC5 /* SemaType.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE67E70A0C020EC500F66BC5 /* SemaType.cpp */; }; + DE67E70D0C020ECA00F66BC5 /* SemaStmt.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE67E70C0C020ECA00F66BC5 /* SemaStmt.cpp */; }; + DE67E70F0C020ECF00F66BC5 /* SemaExprCXX.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE67E70E0C020ECF00F66BC5 /* SemaExprCXX.cpp */; }; + DE67E7110C020ED400F66BC5 /* SemaExpr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE67E7100C020ED400F66BC5 /* SemaExpr.cpp */; }; + DE67E7130C020ED900F66BC5 /* SemaDecl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE67E7120C020ED900F66BC5 /* SemaDecl.cpp */; }; + DE67E7150C020EDF00F66BC5 /* Sema.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE67E7140C020EDF00F66BC5 /* Sema.h */; }; + DE67E7170C020EE400F66BC5 /* Sema.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE67E7160C020EE400F66BC5 /* Sema.cpp */; }; + DE67E71A0C020F4F00F66BC5 /* ASTStreamer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE67E7190C020F4F00F66BC5 /* ASTStreamer.cpp */; }; + DE67E7280C02109800F66BC5 /* ASTStreamer.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE67E7270C02109800F66BC5 /* ASTStreamer.h */; }; + DE75ED290B044DC90020CF81 /* ASTContext.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE75ED280B044DC90020CF81 /* ASTContext.h */; }; + DE75EDF10B06880E0020CF81 /* Type.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE75EDF00B06880E0020CF81 /* Type.cpp */; }; + DE927FFD0C055DE900231DA4 /* LLVMCodegen.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE927FFC0C055DE900231DA4 /* LLVMCodegen.cpp */; }; + DE928B130C05659200231DA4 /* ModuleBuilder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE928B120C05659200231DA4 /* ModuleBuilder.cpp */; }; + DE928B200C0565B000231DA4 /* ModuleBuilder.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE928B1F0C0565B000231DA4 /* ModuleBuilder.h */; }; + DE928B7D0C0A615100231DA4 /* CodeGenModule.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE928B7C0C0A615100231DA4 /* CodeGenModule.h */; }; + DE928B7F0C0A615600231DA4 /* CodeGenModule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE928B7E0C0A615600231DA4 /* CodeGenModule.cpp */; }; + DE928B810C0A615B00231DA4 /* CodeGenFunction.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DE928B800C0A615B00231DA4 /* CodeGenFunction.h */; }; + DE928B830C0A616000231DA4 /* CodeGenFunction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DE928B820C0A616000231DA4 /* CodeGenFunction.cpp */; }; + DEAEE98B0A5A2B970045101B /* MultipleIncludeOpt.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DEAEE98A0A5A2B970045101B /* MultipleIncludeOpt.h */; }; + DEAEED4B0A5AF89A0045101B /* NOTES.txt in CopyFiles */ = {isa = PBXBuildFile; fileRef = DEAEED4A0A5AF89A0045101B /* NOTES.txt */; }; + DEB0AEB90C2087A700718A22 /* TextDiagnostics.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DEB0AEB80C2087A700718A22 /* TextDiagnostics.h */; }; + DEB0AEBB0C2087AB00718A22 /* TextDiagnostics.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DEB0AEBA0C2087AB00718A22 /* TextDiagnostics.cpp */; }; + DEC82DC40C32D50A00BAC245 /* DiagChecker.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DEC82DC30C32D50A00BAC245 /* DiagChecker.cpp */; }; + DEC8D9910A9433CD00353FCA /* Decl.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DEC8D9900A9433CD00353FCA /* Decl.h */; }; + DEC8D9A40A94346E00353FCA /* AST.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DEC8D9A30A94346E00353FCA /* AST.h */; }; + DED626C90AE0C065001E80A4 /* TargetInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DED626C80AE0C065001E80A4 /* TargetInfo.cpp */; }; + DED627030AE0C51D001E80A4 /* Targets.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DED627020AE0C51D001E80A4 /* Targets.cpp */; }; + DED62ABB0AE2EDF1001E80A4 /* Decl.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DED62ABA0AE2EDF1001E80A4 /* Decl.cpp */; }; + DED676D10B6C786700AAD4A3 /* Builtins.def in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED676D00B6C786700AAD4A3 /* Builtins.def */; }; + DED676FA0B6C797B00AAD4A3 /* Builtins.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED676F90B6C797B00AAD4A3 /* Builtins.h */; }; + DED677C90B6C854100AAD4A3 /* Builtins.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DED677C80B6C854100AAD4A3 /* Builtins.cpp */; }; + DED67AEE0B6DB92A00AAD4A3 /* X86Builtins.def in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED67AED0B6DB92A00AAD4A3 /* X86Builtins.def */; }; + DED67AF00B6DB92F00AAD4A3 /* PPCBuiltins.def in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED67AEF0B6DB92F00AAD4A3 /* PPCBuiltins.def */; }; + DED7D7410A524295003AD0FB /* Diagnostic.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED7D7310A524295003AD0FB /* Diagnostic.h */; }; + DED7D7420A524295003AD0FB /* DiagnosticKinds.def in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED7D7320A524295003AD0FB /* DiagnosticKinds.def */; }; + DED7D7430A524295003AD0FB /* FileManager.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED7D7330A524295003AD0FB /* FileManager.h */; }; + DED7D7450A524295003AD0FB /* SourceLocation.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED7D7350A524295003AD0FB /* SourceLocation.h */; }; + DED7D7460A524295003AD0FB /* SourceManager.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED7D7360A524295003AD0FB /* SourceManager.h */; }; + DED7D7470A524295003AD0FB /* TokenKinds.def in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED7D7370A524295003AD0FB /* TokenKinds.def */; }; + DED7D7480A524295003AD0FB /* TokenKinds.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED7D7380A524295003AD0FB /* TokenKinds.h */; }; + DED7D7490A524295003AD0FB /* IdentifierTable.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED7D73A0A524295003AD0FB /* IdentifierTable.h */; }; + DED7D74A0A524295003AD0FB /* Lexer.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED7D73B0A524295003AD0FB /* Lexer.h */; }; + DED7D74B0A524295003AD0FB /* LexerToken.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED7D73C0A524295003AD0FB /* LexerToken.h */; }; + DED7D74C0A524295003AD0FB /* MacroExpander.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED7D73D0A524295003AD0FB /* MacroExpander.h */; }; + DED7D74D0A524295003AD0FB /* MacroInfo.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED7D73E0A524295003AD0FB /* MacroInfo.h */; }; + DED7D74E0A524295003AD0FB /* Pragma.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED7D73F0A524295003AD0FB /* Pragma.h */; }; + DED7D74F0A524295003AD0FB /* Preprocessor.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED7D7400A524295003AD0FB /* Preprocessor.h */; }; + DED7D77A0A5242C7003AD0FB /* Diagnostic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DED7D75D0A5242C7003AD0FB /* Diagnostic.cpp */; }; + DED7D77B0A5242C7003AD0FB /* FileManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DED7D75E0A5242C7003AD0FB /* FileManager.cpp */; }; + DED7D7890A5242C7003AD0FB /* SourceManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DED7D76D0A5242C7003AD0FB /* SourceManager.cpp */; }; + DED7D78A0A5242C7003AD0FB /* TokenKinds.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DED7D76E0A5242C7003AD0FB /* TokenKinds.cpp */; }; + DED7D7C20A5242E6003AD0FB /* IdentifierTable.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DED7D79D0A5242E6003AD0FB /* IdentifierTable.cpp */; }; + DED7D7C30A5242E6003AD0FB /* Lexer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DED7D79E0A5242E6003AD0FB /* Lexer.cpp */; }; + DED7D7C50A5242E6003AD0FB /* MacroInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DED7D7A00A5242E6003AD0FB /* MacroInfo.cpp */; }; + DED7D7C70A5242E6003AD0FB /* PPExpressions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DED7D7A20A5242E6003AD0FB /* PPExpressions.cpp */; }; + DED7D7C80A5242E6003AD0FB /* Pragma.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DED7D7A30A5242E6003AD0FB /* Pragma.cpp */; }; + DED7D7C90A5242E6003AD0FB /* Preprocessor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DED7D7A40A5242E6003AD0FB /* Preprocessor.cpp */; }; + DED7D7D80A524302003AD0FB /* README.txt in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED7D7D70A524302003AD0FB /* README.txt */; }; + DED7D9180A52518C003AD0FB /* ScratchBuffer.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DED7D9170A52518C003AD0FB /* ScratchBuffer.h */; }; + DED7D9E50A5257F6003AD0FB /* ScratchBuffer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DED7D9E40A5257F6003AD0FB /* ScratchBuffer.cpp */; }; + DEEBBD440C19C5D200A9FE82 /* TODO.txt in CopyFiles */ = {isa = PBXBuildFile; fileRef = DEEBBD430C19C5D200A9FE82 /* TODO.txt */; }; + DEEBC3BA0C2363B800A9FE82 /* CodeGenTypes.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DEEBC3B90C2363B800A9FE82 /* CodeGenTypes.h */; }; + DEEBC3BC0C2363BC00A9FE82 /* CodeGenTypes.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DEEBC3BB0C2363BC00A9FE82 /* CodeGenTypes.cpp */; }; + DEEBCBE30C33702C00A9FE82 /* TextDiagnosticBuffer.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = DEEBCBE20C33702C00A9FE82 /* TextDiagnosticBuffer.h */; }; + DEEBCBE50C33703100A9FE82 /* TextDiagnosticBuffer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DEEBCBE40C33703100A9FE82 /* TextDiagnosticBuffer.cpp */; }; + F0226FD20C18084500141F42 /* TextDiagnosticPrinter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F0226FD00C18084500141F42 /* TextDiagnosticPrinter.cpp */; }; + F0226FD30C18084500141F42 /* TextDiagnosticPrinter.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = F0226FD10C18084500141F42 /* TextDiagnosticPrinter.h */; }; +/* End PBXBuildFile section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 8DD76F690486A84900D96B5E /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 8; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + DED7D7410A524295003AD0FB /* Diagnostic.h in CopyFiles */, + DED7D7420A524295003AD0FB /* DiagnosticKinds.def in CopyFiles */, + DED7D7430A524295003AD0FB /* FileManager.h in CopyFiles */, + DED7D7450A524295003AD0FB /* SourceLocation.h in CopyFiles */, + DED7D7460A524295003AD0FB /* SourceManager.h in CopyFiles */, + DED7D7470A524295003AD0FB /* TokenKinds.def in CopyFiles */, + DED7D7480A524295003AD0FB /* TokenKinds.h in CopyFiles */, + DED7D7490A524295003AD0FB /* IdentifierTable.h in CopyFiles */, + DED7D74A0A524295003AD0FB /* Lexer.h in CopyFiles */, + DED7D74B0A524295003AD0FB /* LexerToken.h in CopyFiles */, + DED7D74C0A524295003AD0FB /* MacroExpander.h in CopyFiles */, + DED7D74D0A524295003AD0FB /* MacroInfo.h in CopyFiles */, + DED7D74E0A524295003AD0FB /* Pragma.h in CopyFiles */, + DED7D74F0A524295003AD0FB /* Preprocessor.h in CopyFiles */, + DED7D7D80A524302003AD0FB /* README.txt in CopyFiles */, + DED7D9180A52518C003AD0FB /* ScratchBuffer.h in CopyFiles */, + DEAEE98B0A5A2B970045101B /* MultipleIncludeOpt.h in CopyFiles */, + DEAEED4B0A5AF89A0045101B /* NOTES.txt in CopyFiles */, + DE1F22030A7D852A00FBF588 /* Parser.h in CopyFiles */, + DE06B73E0A8307640050E87E /* LangOptions.h in CopyFiles */, + DE06BECB0A854E4B0050E87E /* Scope.h in CopyFiles */, + DE06E8140A8FF9330050E87E /* Action.h in CopyFiles */, + DEC8D9910A9433CD00353FCA /* Decl.h in CopyFiles */, + DEC8D9A40A94346E00353FCA /* AST.h in CopyFiles */, + DE0FCA630A95859D00248FD5 /* Expr.h in CopyFiles */, + DE5932D20AD60FF400BC794C /* clang.h in CopyFiles */, + DE46BF280AE0A82D00CC047C /* TargetInfo.h in CopyFiles */, + DE344AB80AE5DF6D00DBC861 /* HeaderSearch.h in CopyFiles */, + DE3450D70AEB543100DBC861 /* DirectoryLookup.h in CopyFiles */, + DE3452810AEF1B1800DBC861 /* Stmt.h in CopyFiles */, + DE345C1A0AFC658B00DBC861 /* StmtVisitor.h in CopyFiles */, + DE345F220AFD347900DBC861 /* StmtNodes.def in CopyFiles */, + DE3464220B03040900DBC861 /* Type.h in CopyFiles */, + DE75ED290B044DC90020CF81 /* ASTContext.h in CopyFiles */, + DE1733700B068DC60080B521 /* DeclSpec.h in CopyFiles */, + DE01DA490B12ADA300AC22CE /* PPCallbacks.h in CopyFiles */, + DED676D10B6C786700AAD4A3 /* Builtins.def in CopyFiles */, + DED676FA0B6C797B00AAD4A3 /* Builtins.h in CopyFiles */, + DED67AEE0B6DB92A00AAD4A3 /* X86Builtins.def in CopyFiles */, + DED67AF00B6DB92F00AAD4A3 /* PPCBuiltins.def in CopyFiles */, + 1A30A9E90B93A4C800201A91 /* ExprCXX.h in CopyFiles */, + 1A869A700BA2164C008DA07A /* LiteralSupport.h in CopyFiles */, + DE67E7150C020EDF00F66BC5 /* Sema.h in CopyFiles */, + DE67E7280C02109800F66BC5 /* ASTStreamer.h in CopyFiles */, + DE928B200C0565B000231DA4 /* ModuleBuilder.h in CopyFiles */, + DE928B7D0C0A615100231DA4 /* CodeGenModule.h in CopyFiles */, + DE928B810C0A615B00231DA4 /* CodeGenFunction.h in CopyFiles */, + F0226FD30C18084500141F42 /* TextDiagnosticPrinter.h in CopyFiles */, + DEEBBD440C19C5D200A9FE82 /* TODO.txt in CopyFiles */, + 84D9A88C0C1A581300AC7ABC /* AttributeList.h in CopyFiles */, + DEB0AEB90C2087A700718A22 /* TextDiagnostics.h in CopyFiles */, + DEEBC3BA0C2363B800A9FE82 /* CodeGenTypes.h in CopyFiles */, + DEEBCBE30C33702C00A9FE82 /* TextDiagnosticBuffer.h in CopyFiles */, + ); + runOnlyForDeploymentPostprocessing = 1; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 1A30A9E80B93A4C800201A91 /* ExprCXX.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = ExprCXX.h; path = clang/AST/ExprCXX.h; sourceTree = "<group>"; }; + 1A869A6E0BA2164C008DA07A /* LiteralSupport.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = LiteralSupport.h; sourceTree = "<group>"; }; + 1A869AA70BA21ABA008DA07A /* LiteralSupport.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = LiteralSupport.cpp; sourceTree = "<group>"; }; + 84D9A8870C1A57E100AC7ABC /* AttributeList.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = AttributeList.cpp; path = Parse/AttributeList.cpp; sourceTree = "<group>"; }; + 84D9A88B0C1A581300AC7ABC /* AttributeList.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = AttributeList.h; path = clang/Parse/AttributeList.h; sourceTree = "<group>"; }; + 8DD76F6C0486A84900D96B5E /* clang */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = clang; sourceTree = BUILT_PRODUCTS_DIR; }; + DE01DA480B12ADA300AC22CE /* PPCallbacks.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = PPCallbacks.h; sourceTree = "<group>"; }; + DE06756B0C051CFE00EBBFD8 /* ParseExprCXX.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = ParseExprCXX.cpp; path = Parse/ParseExprCXX.cpp; sourceTree = "<group>"; }; + DE06B73D0A8307640050E87E /* LangOptions.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = LangOptions.h; sourceTree = "<group>"; }; + DE06BECA0A854E4B0050E87E /* Scope.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Scope.h; path = clang/Parse/Scope.h; sourceTree = "<group>"; }; + DE06D42F0A8BB52D0050E87E /* Parser.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Parser.cpp; path = Parse/Parser.cpp; sourceTree = "<group>"; }; + DE06E8130A8FF9330050E87E /* Action.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Action.h; path = clang/Parse/Action.h; sourceTree = "<group>"; }; + DE0FCA620A95859D00248FD5 /* Expr.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Expr.h; path = clang/AST/Expr.h; sourceTree = "<group>"; }; + DE0FCB330A9C21F100248FD5 /* Expr.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Expr.cpp; path = AST/Expr.cpp; sourceTree = "<group>"; }; + DE1732FF0B068B700080B521 /* ASTContext.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = ASTContext.cpp; path = AST/ASTContext.cpp; sourceTree = "<group>"; }; + DE17336D0B068DC20080B521 /* DeclSpec.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = DeclSpec.cpp; path = Parse/DeclSpec.cpp; sourceTree = "<group>"; }; + DE17336F0B068DC60080B521 /* DeclSpec.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = DeclSpec.h; path = clang/Parse/DeclSpec.h; sourceTree = "<group>"; }; + DE1F22020A7D852A00FBF588 /* Parser.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Parser.h; path = clang/Parse/Parser.h; sourceTree = "<group>"; }; + DE344AB70AE5DF6D00DBC861 /* HeaderSearch.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = HeaderSearch.h; sourceTree = "<group>"; }; + DE344B530AE5E46C00DBC861 /* HeaderSearch.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = HeaderSearch.cpp; sourceTree = "<group>"; }; + DE3450D60AEB543100DBC861 /* DirectoryLookup.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = DirectoryLookup.h; sourceTree = "<group>"; }; + DE3451570AEC176100DBC861 /* MacroExpander.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = MacroExpander.cpp; sourceTree = "<group>"; }; + DE3452400AEF1A2D00DBC861 /* Stmt.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Stmt.cpp; path = AST/Stmt.cpp; sourceTree = "<group>"; }; + DE3452800AEF1B1800DBC861 /* Stmt.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Stmt.h; path = clang/AST/Stmt.h; sourceTree = "<group>"; }; + DE345C190AFC658B00DBC861 /* StmtVisitor.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = StmtVisitor.h; path = clang/AST/StmtVisitor.h; sourceTree = "<group>"; }; + DE345C560AFC69E800DBC861 /* StmtVisitor.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = StmtVisitor.cpp; path = AST/StmtVisitor.cpp; sourceTree = "<group>"; }; + DE345F210AFD347900DBC861 /* StmtNodes.def */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; name = StmtNodes.def; path = clang/AST/StmtNodes.def; sourceTree = "<group>"; }; + DE345FFF0AFDCC1900DBC861 /* ParseObjc.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = ParseObjc.cpp; path = Parse/ParseObjc.cpp; sourceTree = "<group>"; }; + DE3460040AFDCC6500DBC861 /* ParseInit.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = ParseInit.cpp; path = Parse/ParseInit.cpp; sourceTree = "<group>"; }; + DE34600A0AFDCCBF00DBC861 /* ParseStmt.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = ParseStmt.cpp; path = Parse/ParseStmt.cpp; sourceTree = "<group>"; }; + DE34600E0AFDCCCE00DBC861 /* ParseDecl.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = ParseDecl.cpp; path = Parse/ParseDecl.cpp; sourceTree = "<group>"; }; + DE3460120AFDCCDA00DBC861 /* ParseExpr.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = ParseExpr.cpp; path = Parse/ParseExpr.cpp; sourceTree = "<group>"; }; + DE3461260AFE68BE00DBC861 /* MinimalAction.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = MinimalAction.cpp; path = Parse/MinimalAction.cpp; sourceTree = "<group>"; }; + DE34621C0AFEB19B00DBC861 /* StmtPrinter.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = StmtPrinter.cpp; path = AST/StmtPrinter.cpp; sourceTree = "<group>"; }; + DE3464210B03040900DBC861 /* Type.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Type.h; path = clang/AST/Type.h; sourceTree = "<group>"; }; + DE4264FB0C113592005A861D /* CGDecl.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = CGDecl.cpp; path = CodeGen/CGDecl.cpp; sourceTree = "<group>"; }; + DE46BF270AE0A82D00CC047C /* TargetInfo.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = TargetInfo.h; sourceTree = "<group>"; }; + DE4772F90C10EAE5002239E8 /* CGStmt.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = CGStmt.cpp; path = CodeGen/CGStmt.cpp; sourceTree = "<group>"; }; + DE4772FB0C10EAEC002239E8 /* CGExpr.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = CGExpr.cpp; path = CodeGen/CGExpr.cpp; sourceTree = "<group>"; }; + DE5932CD0AD60FF400BC794C /* clang.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = clang.cpp; path = Driver/clang.cpp; sourceTree = "<group>"; }; + DE5932CE0AD60FF400BC794C /* clang.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = clang.h; path = Driver/clang.h; sourceTree = "<group>"; }; + DE5932CF0AD60FF400BC794C /* PrintParserCallbacks.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = PrintParserCallbacks.cpp; path = Driver/PrintParserCallbacks.cpp; sourceTree = "<group>"; }; + DE5932D00AD60FF400BC794C /* PrintPreprocessedOutput.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = PrintPreprocessedOutput.cpp; path = Driver/PrintPreprocessedOutput.cpp; sourceTree = "<group>"; }; + DE67E70A0C020EC500F66BC5 /* SemaType.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = SemaType.cpp; path = Sema/SemaType.cpp; sourceTree = "<group>"; }; + DE67E70C0C020ECA00F66BC5 /* SemaStmt.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = SemaStmt.cpp; path = Sema/SemaStmt.cpp; sourceTree = "<group>"; }; + DE67E70E0C020ECF00F66BC5 /* SemaExprCXX.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = SemaExprCXX.cpp; path = Sema/SemaExprCXX.cpp; sourceTree = "<group>"; }; + DE67E7100C020ED400F66BC5 /* SemaExpr.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = SemaExpr.cpp; path = Sema/SemaExpr.cpp; sourceTree = "<group>"; }; + DE67E7120C020ED900F66BC5 /* SemaDecl.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = SemaDecl.cpp; path = Sema/SemaDecl.cpp; sourceTree = "<group>"; }; + DE67E7140C020EDF00F66BC5 /* Sema.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Sema.h; path = Sema/Sema.h; sourceTree = "<group>"; }; + DE67E7160C020EE400F66BC5 /* Sema.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Sema.cpp; path = Sema/Sema.cpp; sourceTree = "<group>"; }; + DE67E7190C020F4F00F66BC5 /* ASTStreamer.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = ASTStreamer.cpp; path = Sema/ASTStreamer.cpp; sourceTree = "<group>"; }; + DE67E7270C02109800F66BC5 /* ASTStreamer.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = ASTStreamer.h; path = clang/Sema/ASTStreamer.h; sourceTree = "<group>"; }; + DE75ED280B044DC90020CF81 /* ASTContext.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = ASTContext.h; path = clang/AST/ASTContext.h; sourceTree = "<group>"; }; + DE75EDF00B06880E0020CF81 /* Type.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Type.cpp; path = AST/Type.cpp; sourceTree = "<group>"; }; + DE927FFC0C055DE900231DA4 /* LLVMCodegen.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = LLVMCodegen.cpp; path = Driver/LLVMCodegen.cpp; sourceTree = "<group>"; }; + DE928B120C05659200231DA4 /* ModuleBuilder.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = ModuleBuilder.cpp; path = CodeGen/ModuleBuilder.cpp; sourceTree = "<group>"; }; + DE928B1F0C0565B000231DA4 /* ModuleBuilder.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = ModuleBuilder.h; path = clang/CodeGen/ModuleBuilder.h; sourceTree = "<group>"; }; + DE928B7C0C0A615100231DA4 /* CodeGenModule.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = CodeGenModule.h; path = CodeGen/CodeGenModule.h; sourceTree = "<group>"; }; + DE928B7E0C0A615600231DA4 /* CodeGenModule.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = CodeGenModule.cpp; path = CodeGen/CodeGenModule.cpp; sourceTree = "<group>"; }; + DE928B800C0A615B00231DA4 /* CodeGenFunction.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = CodeGenFunction.h; path = CodeGen/CodeGenFunction.h; sourceTree = "<group>"; }; + DE928B820C0A616000231DA4 /* CodeGenFunction.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = CodeGenFunction.cpp; path = CodeGen/CodeGenFunction.cpp; sourceTree = "<group>"; }; + DEAEE98A0A5A2B970045101B /* MultipleIncludeOpt.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = MultipleIncludeOpt.h; sourceTree = "<group>"; }; + DEAEED4A0A5AF89A0045101B /* NOTES.txt */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = NOTES.txt; sourceTree = "<group>"; }; + DEB0AEB80C2087A700718A22 /* TextDiagnostics.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TextDiagnostics.h; path = Driver/TextDiagnostics.h; sourceTree = "<group>"; }; + DEB0AEBA0C2087AB00718A22 /* TextDiagnostics.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = TextDiagnostics.cpp; path = Driver/TextDiagnostics.cpp; sourceTree = "<group>"; }; + DEC82DC30C32D50A00BAC245 /* DiagChecker.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = DiagChecker.cpp; path = Driver/DiagChecker.cpp; sourceTree = "<group>"; }; + DEC8D9900A9433CD00353FCA /* Decl.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Decl.h; path = clang/AST/Decl.h; sourceTree = "<group>"; }; + DEC8D9A30A94346E00353FCA /* AST.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = AST.h; path = clang/AST/AST.h; sourceTree = "<group>"; }; + DED626C80AE0C065001E80A4 /* TargetInfo.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = TargetInfo.cpp; sourceTree = "<group>"; }; + DED627020AE0C51D001E80A4 /* Targets.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Targets.cpp; path = Driver/Targets.cpp; sourceTree = "<group>"; }; + DED62ABA0AE2EDF1001E80A4 /* Decl.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Decl.cpp; path = AST/Decl.cpp; sourceTree = "<group>"; usesTabs = 1; }; + DED676D00B6C786700AAD4A3 /* Builtins.def */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; name = Builtins.def; path = clang/AST/Builtins.def; sourceTree = "<group>"; }; + DED676F90B6C797B00AAD4A3 /* Builtins.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Builtins.h; path = clang/AST/Builtins.h; sourceTree = "<group>"; }; + DED677C80B6C854100AAD4A3 /* Builtins.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Builtins.cpp; path = AST/Builtins.cpp; sourceTree = "<group>"; }; + DED67AED0B6DB92A00AAD4A3 /* X86Builtins.def */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; name = X86Builtins.def; path = Driver/X86Builtins.def; sourceTree = "<group>"; }; + DED67AEF0B6DB92F00AAD4A3 /* PPCBuiltins.def */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; name = PPCBuiltins.def; path = Driver/PPCBuiltins.def; sourceTree = "<group>"; }; + DED7D7310A524295003AD0FB /* Diagnostic.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = Diagnostic.h; sourceTree = "<group>"; }; + DED7D7320A524295003AD0FB /* DiagnosticKinds.def */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = DiagnosticKinds.def; sourceTree = "<group>"; }; + DED7D7330A524295003AD0FB /* FileManager.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = FileManager.h; sourceTree = "<group>"; }; + DED7D7350A524295003AD0FB /* SourceLocation.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = SourceLocation.h; sourceTree = "<group>"; }; + DED7D7360A524295003AD0FB /* SourceManager.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = SourceManager.h; sourceTree = "<group>"; }; + DED7D7370A524295003AD0FB /* TokenKinds.def */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = TokenKinds.def; sourceTree = "<group>"; }; + DED7D7380A524295003AD0FB /* TokenKinds.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = TokenKinds.h; sourceTree = "<group>"; }; + DED7D73A0A524295003AD0FB /* IdentifierTable.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = IdentifierTable.h; sourceTree = "<group>"; }; + DED7D73B0A524295003AD0FB /* Lexer.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = Lexer.h; sourceTree = "<group>"; }; + DED7D73C0A524295003AD0FB /* LexerToken.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = LexerToken.h; sourceTree = "<group>"; }; + DED7D73D0A524295003AD0FB /* MacroExpander.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = MacroExpander.h; sourceTree = "<group>"; }; + DED7D73E0A524295003AD0FB /* MacroInfo.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = MacroInfo.h; sourceTree = "<group>"; }; + DED7D73F0A524295003AD0FB /* Pragma.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = Pragma.h; sourceTree = "<group>"; }; + DED7D7400A524295003AD0FB /* Preprocessor.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = Preprocessor.h; sourceTree = "<group>"; }; + DED7D75D0A5242C7003AD0FB /* Diagnostic.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = Diagnostic.cpp; sourceTree = "<group>"; }; + DED7D75E0A5242C7003AD0FB /* FileManager.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = FileManager.cpp; sourceTree = "<group>"; }; + DED7D76D0A5242C7003AD0FB /* SourceManager.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = SourceManager.cpp; sourceTree = "<group>"; }; + DED7D76E0A5242C7003AD0FB /* TokenKinds.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = TokenKinds.cpp; sourceTree = "<group>"; }; + DED7D79D0A5242E6003AD0FB /* IdentifierTable.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = IdentifierTable.cpp; sourceTree = "<group>"; }; + DED7D79E0A5242E6003AD0FB /* Lexer.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = Lexer.cpp; sourceTree = "<group>"; }; + DED7D7A00A5242E6003AD0FB /* MacroInfo.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = MacroInfo.cpp; sourceTree = "<group>"; }; + DED7D7A20A5242E6003AD0FB /* PPExpressions.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = PPExpressions.cpp; sourceTree = "<group>"; }; + DED7D7A30A5242E6003AD0FB /* Pragma.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = Pragma.cpp; sourceTree = "<group>"; }; + DED7D7A40A5242E6003AD0FB /* Preprocessor.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = Preprocessor.cpp; sourceTree = "<group>"; }; + DED7D7D70A524302003AD0FB /* README.txt */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = README.txt; sourceTree = "<group>"; }; + DED7D9170A52518C003AD0FB /* ScratchBuffer.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = ScratchBuffer.h; sourceTree = "<group>"; }; + DED7D9E40A5257F6003AD0FB /* ScratchBuffer.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = ScratchBuffer.cpp; sourceTree = "<group>"; }; + DEEBBD430C19C5D200A9FE82 /* TODO.txt */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text; path = TODO.txt; sourceTree = "<group>"; }; + DEEBC3B90C2363B800A9FE82 /* CodeGenTypes.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = CodeGenTypes.h; path = CodeGen/CodeGenTypes.h; sourceTree = "<group>"; }; + DEEBC3BB0C2363BC00A9FE82 /* CodeGenTypes.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = CodeGenTypes.cpp; path = CodeGen/CodeGenTypes.cpp; sourceTree = "<group>"; }; + DEEBCBE20C33702C00A9FE82 /* TextDiagnosticBuffer.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TextDiagnosticBuffer.h; path = Driver/TextDiagnosticBuffer.h; sourceTree = "<group>"; }; + DEEBCBE40C33703100A9FE82 /* TextDiagnosticBuffer.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = TextDiagnosticBuffer.cpp; path = Driver/TextDiagnosticBuffer.cpp; sourceTree = "<group>"; }; + F0226FD00C18084500141F42 /* TextDiagnosticPrinter.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = TextDiagnosticPrinter.cpp; path = Driver/TextDiagnosticPrinter.cpp; sourceTree = "<group>"; }; + F0226FD10C18084500141F42 /* TextDiagnosticPrinter.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TextDiagnosticPrinter.h; path = Driver/TextDiagnosticPrinter.h; sourceTree = "<group>"; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 8DD76F660486A84900D96B5E /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 08FB7794FE84155DC02AAC07 /* clang */ = { + isa = PBXGroup; + children = ( + DED7D72E0A524295003AD0FB /* include */, + 08FB7795FE84155DC02AAC07 /* Source */, + DEAEECAE0A5AF0FA0045101B /* Driver */, + C6859E8C029090F304C91782 /* Documentation */, + 1AB674ADFE9D54B511CA2CBB /* Products */, + ); + name = clang; + sourceTree = "<group>"; + }; + 08FB7795FE84155DC02AAC07 /* Source */ = { + isa = PBXGroup; + children = ( + DED7D7500A5242C7003AD0FB /* Basic */, + DED7D78C0A5242E6003AD0FB /* Lex */, + DE1F22600A7D8C9B00FBF588 /* Parse */, + DEC8D9920A9433F400353FCA /* AST */, + DE67E7070C020EAB00F66BC5 /* Sema */, + DE927FCC0C0557CD00231DA4 /* CodeGen */, + ); + name = Source; + sourceTree = "<group>"; + }; + 1AB674ADFE9D54B511CA2CBB /* Products */ = { + isa = PBXGroup; + children = ( + 8DD76F6C0486A84900D96B5E /* clang */, + ); + name = Products; + sourceTree = "<group>"; + }; + C6859E8C029090F304C91782 /* Documentation */ = { + isa = PBXGroup; + children = ( + DEAEED4A0A5AF89A0045101B /* NOTES.txt */, + DED7D7D70A524302003AD0FB /* README.txt */, + DEEBBD430C19C5D200A9FE82 /* TODO.txt */, + ); + name = Documentation; + sourceTree = "<group>"; + }; + DE1F21F20A7D84E800FBF588 /* Parse */ = { + isa = PBXGroup; + children = ( + 84D9A88B0C1A581300AC7ABC /* AttributeList.h */, + DE06E8130A8FF9330050E87E /* Action.h */, + DE17336F0B068DC60080B521 /* DeclSpec.h */, + DE1F22020A7D852A00FBF588 /* Parser.h */, + DE06BECA0A854E4B0050E87E /* Scope.h */, + ); + name = Parse; + sourceTree = "<group>"; + }; + DE1F22600A7D8C9B00FBF588 /* Parse */ = { + isa = PBXGroup; + children = ( + 84D9A8870C1A57E100AC7ABC /* AttributeList.cpp */, + DE3461260AFE68BE00DBC861 /* MinimalAction.cpp */, + DE06D42F0A8BB52D0050E87E /* Parser.cpp */, + DE3460040AFDCC6500DBC861 /* ParseInit.cpp */, + DE34600E0AFDCCCE00DBC861 /* ParseDecl.cpp */, + DE3460120AFDCCDA00DBC861 /* ParseExpr.cpp */, + DE06756B0C051CFE00EBBFD8 /* ParseExprCXX.cpp */, + DE34600A0AFDCCBF00DBC861 /* ParseStmt.cpp */, + DE345FFF0AFDCC1900DBC861 /* ParseObjc.cpp */, + DE17336D0B068DC20080B521 /* DeclSpec.cpp */, + ); + name = Parse; + sourceTree = "<group>"; + }; + DE67E7070C020EAB00F66BC5 /* Sema */ = { + isa = PBXGroup; + children = ( + DE67E7190C020F4F00F66BC5 /* ASTStreamer.cpp */, + DE67E7140C020EDF00F66BC5 /* Sema.h */, + DE67E7160C020EE400F66BC5 /* Sema.cpp */, + DE67E7120C020ED900F66BC5 /* SemaDecl.cpp */, + DE67E7100C020ED400F66BC5 /* SemaExpr.cpp */, + DE67E70E0C020ECF00F66BC5 /* SemaExprCXX.cpp */, + DE67E70C0C020ECA00F66BC5 /* SemaStmt.cpp */, + DE67E70A0C020EC500F66BC5 /* SemaType.cpp */, + ); + name = Sema; + sourceTree = "<group>"; + }; + DE67E7260C02108300F66BC5 /* Sema */ = { + isa = PBXGroup; + children = ( + DE67E7270C02109800F66BC5 /* ASTStreamer.h */, + ); + name = Sema; + sourceTree = "<group>"; + }; + DE927FCC0C0557CD00231DA4 /* CodeGen */ = { + isa = PBXGroup; + children = ( + DE928B800C0A615B00231DA4 /* CodeGenFunction.h */, + DE928B820C0A616000231DA4 /* CodeGenFunction.cpp */, + DE928B7C0C0A615100231DA4 /* CodeGenModule.h */, + DE928B7E0C0A615600231DA4 /* CodeGenModule.cpp */, + DEEBC3BB0C2363BC00A9FE82 /* CodeGenTypes.cpp */, + DEEBC3B90C2363B800A9FE82 /* CodeGenTypes.h */, + DE4264FB0C113592005A861D /* CGDecl.cpp */, + DE4772FB0C10EAEC002239E8 /* CGExpr.cpp */, + DE4772F90C10EAE5002239E8 /* CGStmt.cpp */, + DE928B120C05659200231DA4 /* ModuleBuilder.cpp */, + ); + name = CodeGen; + sourceTree = "<group>"; + }; + DE928B140C05659A00231DA4 /* CodeGen */ = { + isa = PBXGroup; + children = ( + DE928B1F0C0565B000231DA4 /* ModuleBuilder.h */, + ); + name = CodeGen; + sourceTree = "<group>"; + }; + DEAEECAE0A5AF0FA0045101B /* Driver */ = { + isa = PBXGroup; + children = ( + DE5932CD0AD60FF400BC794C /* clang.cpp */, + DE5932CE0AD60FF400BC794C /* clang.h */, + DED67AEF0B6DB92F00AAD4A3 /* PPCBuiltins.def */, + DED67AED0B6DB92A00AAD4A3 /* X86Builtins.def */, + DEC82DC30C32D50A00BAC245 /* DiagChecker.cpp */, + DE927FFC0C055DE900231DA4 /* LLVMCodegen.cpp */, + DE5932CF0AD60FF400BC794C /* PrintParserCallbacks.cpp */, + DE5932D00AD60FF400BC794C /* PrintPreprocessedOutput.cpp */, + DED627020AE0C51D001E80A4 /* Targets.cpp */, + DEB0AEBA0C2087AB00718A22 /* TextDiagnostics.cpp */, + DEB0AEB80C2087A700718A22 /* TextDiagnostics.h */, + F0226FD00C18084500141F42 /* TextDiagnosticPrinter.cpp */, + F0226FD10C18084500141F42 /* TextDiagnosticPrinter.h */, + DEEBCBE40C33703100A9FE82 /* TextDiagnosticBuffer.cpp */, + DEEBCBE20C33702C00A9FE82 /* TextDiagnosticBuffer.h */, + ); + name = Driver; + sourceTree = "<group>"; + }; + DEC8D98B0A9433BC00353FCA /* AST */ = { + isa = PBXGroup; + children = ( + DEC8D9A30A94346E00353FCA /* AST.h */, + DE75ED280B044DC90020CF81 /* ASTContext.h */, + DED676D00B6C786700AAD4A3 /* Builtins.def */, + DED676F90B6C797B00AAD4A3 /* Builtins.h */, + DEC8D9900A9433CD00353FCA /* Decl.h */, + DE0FCA620A95859D00248FD5 /* Expr.h */, + 1A30A9E80B93A4C800201A91 /* ExprCXX.h */, + DE3452800AEF1B1800DBC861 /* Stmt.h */, + DE345F210AFD347900DBC861 /* StmtNodes.def */, + DE345C190AFC658B00DBC861 /* StmtVisitor.h */, + DE3464210B03040900DBC861 /* Type.h */, + ); + name = AST; + sourceTree = "<group>"; + }; + DEC8D9920A9433F400353FCA /* AST */ = { + isa = PBXGroup; + children = ( + DE1732FF0B068B700080B521 /* ASTContext.cpp */, + DED677C80B6C854100AAD4A3 /* Builtins.cpp */, + DED62ABA0AE2EDF1001E80A4 /* Decl.cpp */, + DE0FCB330A9C21F100248FD5 /* Expr.cpp */, + DE3452400AEF1A2D00DBC861 /* Stmt.cpp */, + DE75EDF00B06880E0020CF81 /* Type.cpp */, + DE34621C0AFEB19B00DBC861 /* StmtPrinter.cpp */, + DE345C560AFC69E800DBC861 /* StmtVisitor.cpp */, + ); + name = AST; + sourceTree = "<group>"; + }; + DED7D72E0A524295003AD0FB /* include */ = { + isa = PBXGroup; + children = ( + DED7D7300A524295003AD0FB /* Basic */, + DED7D7390A524295003AD0FB /* Lex */, + DE1F21F20A7D84E800FBF588 /* Parse */, + DEC8D98B0A9433BC00353FCA /* AST */, + DE67E7260C02108300F66BC5 /* Sema */, + DE928B140C05659A00231DA4 /* CodeGen */, + ); + path = include; + sourceTree = "<group>"; + }; + DED7D7300A524295003AD0FB /* Basic */ = { + isa = PBXGroup; + children = ( + DED7D7310A524295003AD0FB /* Diagnostic.h */, + DED7D7320A524295003AD0FB /* DiagnosticKinds.def */, + DED7D7330A524295003AD0FB /* FileManager.h */, + DE06B73D0A8307640050E87E /* LangOptions.h */, + DED7D7350A524295003AD0FB /* SourceLocation.h */, + DED7D7360A524295003AD0FB /* SourceManager.h */, + DE46BF270AE0A82D00CC047C /* TargetInfo.h */, + DED7D7370A524295003AD0FB /* TokenKinds.def */, + DED7D7380A524295003AD0FB /* TokenKinds.h */, + ); + name = Basic; + path = clang/Basic; + sourceTree = "<group>"; + }; + DED7D7390A524295003AD0FB /* Lex */ = { + isa = PBXGroup; + children = ( + DE3450D60AEB543100DBC861 /* DirectoryLookup.h */, + DE344AB70AE5DF6D00DBC861 /* HeaderSearch.h */, + DED7D73A0A524295003AD0FB /* IdentifierTable.h */, + DED7D73B0A524295003AD0FB /* Lexer.h */, + DED7D73C0A524295003AD0FB /* LexerToken.h */, + 1A869A6E0BA2164C008DA07A /* LiteralSupport.h */, + DED7D73D0A524295003AD0FB /* MacroExpander.h */, + DED7D73E0A524295003AD0FB /* MacroInfo.h */, + DEAEE98A0A5A2B970045101B /* MultipleIncludeOpt.h */, + DE01DA480B12ADA300AC22CE /* PPCallbacks.h */, + DED7D73F0A524295003AD0FB /* Pragma.h */, + DED7D7400A524295003AD0FB /* Preprocessor.h */, + DED7D9170A52518C003AD0FB /* ScratchBuffer.h */, + ); + name = Lex; + path = clang/Lex; + sourceTree = "<group>"; + }; + DED7D7500A5242C7003AD0FB /* Basic */ = { + isa = PBXGroup; + children = ( + DED7D75D0A5242C7003AD0FB /* Diagnostic.cpp */, + DED7D75E0A5242C7003AD0FB /* FileManager.cpp */, + DED7D76D0A5242C7003AD0FB /* SourceManager.cpp */, + DED7D76E0A5242C7003AD0FB /* TokenKinds.cpp */, + DED626C80AE0C065001E80A4 /* TargetInfo.cpp */, + ); + path = Basic; + sourceTree = "<group>"; + }; + DED7D78C0A5242E6003AD0FB /* Lex */ = { + isa = PBXGroup; + children = ( + DE344B530AE5E46C00DBC861 /* HeaderSearch.cpp */, + DED7D79D0A5242E6003AD0FB /* IdentifierTable.cpp */, + DED7D79E0A5242E6003AD0FB /* Lexer.cpp */, + 1A869AA70BA21ABA008DA07A /* LiteralSupport.cpp */, + DE3451570AEC176100DBC861 /* MacroExpander.cpp */, + DED7D7A00A5242E6003AD0FB /* MacroInfo.cpp */, + DED7D7A20A5242E6003AD0FB /* PPExpressions.cpp */, + DED7D7A30A5242E6003AD0FB /* Pragma.cpp */, + DED7D7A40A5242E6003AD0FB /* Preprocessor.cpp */, + DED7D9E40A5257F6003AD0FB /* ScratchBuffer.cpp */, + ); + path = Lex; + sourceTree = "<group>"; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 8DD76F620486A84900D96B5E /* clang */ = { + isa = PBXNativeTarget; + buildConfigurationList = 1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "clang" */; + buildPhases = ( + 8DD76F640486A84900D96B5E /* Sources */, + 8DD76F660486A84900D96B5E /* Frameworks */, + 8DD76F690486A84900D96B5E /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = clang; + productInstallPath = "$(HOME)/bin"; + productName = clang; + productReference = 8DD76F6C0486A84900D96B5E /* clang */; + productType = "com.apple.product-type.tool"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 08FB7793FE84155DC02AAC07 /* Project object */ = { + isa = PBXProject; + buildConfigurationList = 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "clang" */; + hasScannedForEncodings = 1; + mainGroup = 08FB7794FE84155DC02AAC07 /* clang */; + projectDirPath = ""; + targets = ( + 8DD76F620486A84900D96B5E /* clang */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + 8DD76F640486A84900D96B5E /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + DED7D77A0A5242C7003AD0FB /* Diagnostic.cpp in Sources */, + DED7D77B0A5242C7003AD0FB /* FileManager.cpp in Sources */, + DED7D7890A5242C7003AD0FB /* SourceManager.cpp in Sources */, + DED7D78A0A5242C7003AD0FB /* TokenKinds.cpp in Sources */, + DED7D7C20A5242E6003AD0FB /* IdentifierTable.cpp in Sources */, + DED7D7C30A5242E6003AD0FB /* Lexer.cpp in Sources */, + DED7D7C50A5242E6003AD0FB /* MacroInfo.cpp in Sources */, + DED7D7C70A5242E6003AD0FB /* PPExpressions.cpp in Sources */, + DED7D7C80A5242E6003AD0FB /* Pragma.cpp in Sources */, + DED7D7C90A5242E6003AD0FB /* Preprocessor.cpp in Sources */, + DED7D9E50A5257F6003AD0FB /* ScratchBuffer.cpp in Sources */, + DE06D4310A8BB52D0050E87E /* Parser.cpp in Sources */, + DE0FCB340A9C21F100248FD5 /* Expr.cpp in Sources */, + DE5932D10AD60FF400BC794C /* clang.cpp in Sources */, + DE5932D30AD60FF400BC794C /* PrintParserCallbacks.cpp in Sources */, + DE5932D40AD60FF400BC794C /* PrintPreprocessedOutput.cpp in Sources */, + DED626C90AE0C065001E80A4 /* TargetInfo.cpp in Sources */, + DED627030AE0C51D001E80A4 /* Targets.cpp in Sources */, + DED62ABB0AE2EDF1001E80A4 /* Decl.cpp in Sources */, + DE344B540AE5E46C00DBC861 /* HeaderSearch.cpp in Sources */, + DE3451580AEC176100DBC861 /* MacroExpander.cpp in Sources */, + DE3452410AEF1A2D00DBC861 /* Stmt.cpp in Sources */, + DE345C570AFC69E800DBC861 /* StmtVisitor.cpp in Sources */, + DE3460000AFDCC1900DBC861 /* ParseObjc.cpp in Sources */, + DE3460050AFDCC6500DBC861 /* ParseInit.cpp in Sources */, + DE34600B0AFDCCBF00DBC861 /* ParseStmt.cpp in Sources */, + DE34600F0AFDCCCE00DBC861 /* ParseDecl.cpp in Sources */, + DE3460130AFDCCDA00DBC861 /* ParseExpr.cpp in Sources */, + DE3461270AFE68BE00DBC861 /* MinimalAction.cpp in Sources */, + DE34621D0AFEB19B00DBC861 /* StmtPrinter.cpp in Sources */, + DE75EDF10B06880E0020CF81 /* Type.cpp in Sources */, + DE1733000B068B700080B521 /* ASTContext.cpp in Sources */, + DE17336E0B068DC20080B521 /* DeclSpec.cpp in Sources */, + DED677C90B6C854100AAD4A3 /* Builtins.cpp in Sources */, + 1A869AA80BA21ABA008DA07A /* LiteralSupport.cpp in Sources */, + DE67E70B0C020EC500F66BC5 /* SemaType.cpp in Sources */, + DE67E70D0C020ECA00F66BC5 /* SemaStmt.cpp in Sources */, + DE67E70F0C020ECF00F66BC5 /* SemaExprCXX.cpp in Sources */, + DE67E7110C020ED400F66BC5 /* SemaExpr.cpp in Sources */, + DE67E7130C020ED900F66BC5 /* SemaDecl.cpp in Sources */, + DE67E7170C020EE400F66BC5 /* Sema.cpp in Sources */, + DE67E71A0C020F4F00F66BC5 /* ASTStreamer.cpp in Sources */, + DE06756C0C051CFE00EBBFD8 /* ParseExprCXX.cpp in Sources */, + DE927FFD0C055DE900231DA4 /* LLVMCodegen.cpp in Sources */, + DE928B130C05659200231DA4 /* ModuleBuilder.cpp in Sources */, + DE928B7F0C0A615600231DA4 /* CodeGenModule.cpp in Sources */, + DE928B830C0A616000231DA4 /* CodeGenFunction.cpp in Sources */, + DE4772FA0C10EAE5002239E8 /* CGStmt.cpp in Sources */, + DE4772FC0C10EAEC002239E8 /* CGExpr.cpp in Sources */, + DE4264FC0C113592005A861D /* CGDecl.cpp in Sources */, + F0226FD20C18084500141F42 /* TextDiagnosticPrinter.cpp in Sources */, + 84D9A8880C1A57E100AC7ABC /* AttributeList.cpp in Sources */, + DEB0AEBB0C2087AB00718A22 /* TextDiagnostics.cpp in Sources */, + DEEBC3BC0C2363BC00A9FE82 /* CodeGenTypes.cpp in Sources */, + DEC82DC40C32D50A00BAC245 /* DiagChecker.cpp in Sources */, + DEEBCBE50C33703100A9FE82 /* TextDiagnosticBuffer.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 1DEB923208733DC60010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = i386; + COPY_PHASE_STRIP = NO; + GCC_CW_ASM_SYNTAX = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_CPP_EXCEPTIONS = NO; + GCC_ENABLE_CPP_RTTI = NO; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + GCC_ENABLE_PASCAL_STRINGS = NO; + GCC_ENABLE_SYMBOL_SEPARATION = NO; + GCC_GENERATE_DEBUGGING_SYMBOLS = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = "__STDC_LIMIT_MACROS=1"; + GCC_STRICT_ALIASING = YES; + GCC_THREADSAFE_STATICS = NO; + GCC_USE_GCC3_PFE_SUPPORT = NO; + HEADER_SEARCH_PATHS = ( + "~/llvm/tools/clang/include", + "~/llvm/include", + "$(HEADER_SEARCH_PATHS)", + ); + INSTALL_PATH = "$(HOME)/bin"; + LIBRARY_SEARCH_PATHS = "~/llvm/Debug/lib"; + OTHER_LDFLAGS = ( + "-lLLVMSupport", + "-lLLVMSystem", + ); + PRECOMPS_INCLUDE_HEADERS_FROM_BUILT_PRODUCTS_DIR = NO; + PRODUCT_NAME = clang; + ZERO_LINK = NO; + }; + name = Debug; + }; + 1DEB923308733DC60010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = i386; + GCC_CW_ASM_SYNTAX = NO; + GCC_ENABLE_CPP_EXCEPTIONS = NO; + GCC_ENABLE_CPP_RTTI = NO; + GCC_ENABLE_FIX_AND_CONTINUE = NO; + GCC_ENABLE_PASCAL_STRINGS = NO; + GCC_ENABLE_SYMBOL_SEPARATION = NO; + GCC_GENERATE_DEBUGGING_SYMBOLS = YES; + GCC_MODEL_TUNING = G5; + GCC_PREPROCESSOR_DEFINITIONS = "__STDC_LIMIT_MACROS=1"; + GCC_STRICT_ALIASING = YES; + GCC_THREADSAFE_STATICS = NO; + GCC_USE_GCC3_PFE_SUPPORT = NO; + HEADER_SEARCH_PATHS = ( + "~/llvm/tools/clang/include", + "~/llvm/include", + "$(HEADER_SEARCH_PATHS)", + ); + INSTALL_PATH = "$(HOME)/bin"; + LIBRARY_SEARCH_PATHS = "~/llvm/Debug/lib"; + OTHER_LDFLAGS = ( + "-lLLVMSupport", + "-lLLVMSystem", + ); + PRECOMPS_INCLUDE_HEADERS_FROM_BUILT_PRODUCTS_DIR = NO; + PRODUCT_NAME = clang; + ZERO_LINK = NO; + }; + name = Release; + }; + 1DEB923608733DC60010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + PREBINDING = NO; + SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk; + }; + name = Debug; + }; + 1DEB923708733DC60010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + PREBINDING = NO; + SDKROOT = /Developer/SDKs/MacOSX10.4u.sdk; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "clang" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB923208733DC60010E9CD /* Debug */, + 1DEB923308733DC60010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "clang" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB923608733DC60010E9CD /* Debug */, + 1DEB923708733DC60010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 08FB7793FE84155DC02AAC07 /* Project object */; +} diff --git a/include/clang/AST/AST.h b/include/clang/AST/AST.h new file mode 100644 index 0000000000..a185692985 --- /dev/null +++ b/include/clang/AST/AST.h @@ -0,0 +1,24 @@ +//===--- AST.h - "Umbrella" header for AST library --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface to the AST classes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_AST_H +#define LLVM_CLANG_AST_AST_H + +// This header exports all AST interfaces. +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/Type.h" +#include "clang/AST/StmtVisitor.h" + +#endif diff --git a/include/clang/AST/ASTContext.h b/include/clang/AST/ASTContext.h new file mode 100644 index 0000000000..759940dfdd --- /dev/null +++ b/include/clang/AST/ASTContext.h @@ -0,0 +1,128 @@ +//===--- ASTContext.h - Context to hold long-lived AST nodes ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ASTContext interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_ASTCONTEXT_H +#define LLVM_CLANG_AST_ASTCONTEXT_H + +#include "clang/AST/Builtins.h" +#include "clang/AST/Type.h" +#include "clang/AST/Expr.h" +#include <vector> + +namespace clang { + class TargetInfo; + +/// ASTContext - This class holds long-lived AST nodes (such as types and +/// decls) that can be referred to throughout the semantic analysis of a file. +class ASTContext { + std::vector<Type*> Types; + llvm::FoldingSet<ComplexType> ComplexTypes; + llvm::FoldingSet<PointerType> PointerTypes; + llvm::FoldingSet<ReferenceType> ReferenceTypes; + llvm::FoldingSet<ArrayType> ArrayTypes; + llvm::FoldingSet<VectorType> VectorTypes; + llvm::FoldingSet<FunctionTypeNoProto> FunctionTypeNoProtos; + llvm::FoldingSet<FunctionTypeProto> FunctionTypeProtos; +public: + TargetInfo &Target; + Builtin::Context BuiltinInfo; + + // Builtin Types. + QualType VoidTy; + QualType BoolTy; + QualType CharTy; + QualType SignedCharTy, ShortTy, IntTy, LongTy, LongLongTy; + QualType UnsignedCharTy, UnsignedShortTy, UnsignedIntTy, UnsignedLongTy; + QualType UnsignedLongLongTy; + QualType FloatTy, DoubleTy, LongDoubleTy; + QualType FloatComplexTy, DoubleComplexTy, LongDoubleComplexTy; + + ASTContext(TargetInfo &t, IdentifierTable &idents) : Target(t) { + InitBuiltinTypes(); + BuiltinInfo.InitializeBuiltins(idents, Target); + } + ~ASTContext(); + + void PrintStats() const; + + /// getComplexType - Return the uniqued reference to the type for a complex + /// number with the specified element type. + QualType getComplexType(QualType T); + + /// getPointerType - Return the uniqued reference to the type for a pointer to + /// the specified type. + QualType getPointerType(QualType T); + + /// getReferenceType - Return the uniqued reference to the type for a + /// reference to the specified type. + QualType getReferenceType(QualType T); + + /// getArrayType - Return the unique reference to the type for an array of the + /// specified element type. + QualType getArrayType(QualType EltTy, ArrayType::ArraySizeModifier ASM, + unsigned EltTypeQuals, Expr *NumElts); + + /// convertToVectorType - Return the unique reference to a vector type of + /// the specified element type and size. VectorType can be a pointer, array, + /// function, or built-in type (i.e. _Bool, integer, or float). + QualType convertToVectorType(QualType VectorType, unsigned NumElts); + + /// getFunctionTypeNoProto - Return a K&R style C function type like 'int()'. + /// + QualType getFunctionTypeNoProto(QualType ResultTy); + + /// getFunctionType - Return a normal function type with a typed argument + /// list. isVariadic indicates whether the argument list includes '...'. + QualType getFunctionType(QualType ResultTy, QualType *ArgArray, + unsigned NumArgs, bool isVariadic); + + /// getTypedefType - Return the unique reference to the type for the + /// specified typename decl. + QualType getTypedefType(TypedefDecl *Decl); + + /// getTagDeclType - Return the unique reference to the type for the + /// specified TagDecl (struct/union/class/enum) decl. + QualType getTagDeclType(TagDecl *Decl); + + /// getSizeType - Return the unique type for "size_t" (C99 7.17), defined + /// in <stddef.h>. The sizeof operator requires this (C99 6.5.3.4p4). + QualType getSizeType() const; + + /// getIntegerBitwidth - Return the bitwidth of the specified integer type + /// according to the target. 'Loc' specifies the source location that + /// requires evaluation of this property. + unsigned getIntegerBitwidth(QualType T, SourceLocation Loc); + + // maxIntegerType - Returns the highest ranked integer type. Handles 3 + // different type combos: unsigned/unsigned, signed/signed, signed/unsigned. + static QualType maxIntegerType(QualType lhs, QualType rhs); + + // maxFloatingType - Returns the highest ranked float type. Both input + // types are required to be floats. + static QualType maxFloatingType(QualType lt, QualType rt); + + // maxComplexType - Returns the highest ranked complex type. Handles 3 + // different type combos: complex/complex, complex/float, float/complex. + QualType maxComplexType(QualType lt, QualType rt) const; + +private: + ASTContext(const ASTContext&); // DO NOT IMPLEMENT + void operator=(const ASTContext&); // DO NOT IMPLEMENT + + void InitBuiltinTypes(); + void InitBuiltinType(QualType &R, BuiltinType::Kind K); +}; + +} // end namespace clang + +#endif diff --git a/include/clang/AST/Builtins.def b/include/clang/AST/Builtins.def new file mode 100644 index 0000000000..97431da951 --- /dev/null +++ b/include/clang/AST/Builtins.def @@ -0,0 +1,53 @@ +//===--- Builtins.def - Builtin function info database ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the standard builtin function database. Users of this file +// must define the BUILTIN macro to make use of this information. +// +//===----------------------------------------------------------------------===// + +// FIXME: this needs to be the full list supported by GCC. Right now, I'm just +// adding stuff on demand. +// +// FIXME: This should really be a .td file, but that requires modifying tblgen. +// Perhaps tblgen should have plugins. + +// The first value provided to the macro specifies the function name of the +// builtin, and results in a clang::builtin::BIXX enum value for XX. + +// The second value provided to the macro specifies the type of the function +// (result value, then each argument) as follows: +// v -> void +// c -> char +// s -> short +// i -> int +// f -> float +// d -> double +// . -> "...". This may only occur at the end of the function list. +// +// Types maybe prefixed with the following modifiers: +// L -> long (e.g. Li for 'long int') +// LL -> long long +// S -> signed +// U -> unsigned + +// The third value provided to the macro specifies information about attributes +// of the function. Currently we have: +// n -> nothrow +// c -> const + +BUILTIN(__builtin_inf , "d" , "nc") +BUILTIN(__builtin_inff , "f" , "nc") +BUILTIN(__builtin_infl , "Ld" , "nc") +BUILTIN(__builtin_fabs , "dd" , "nc") +BUILTIN(__builtin_fabsf, "ff" , "nc") +BUILTIN(__builtin_fabsl, "LdLd", "nc") +BUILTIN(__builtin_constant_p, "UsUs", "nc") + +#undef BUILTIN diff --git a/include/clang/AST/Builtins.h b/include/clang/AST/Builtins.h new file mode 100644 index 0000000000..682031f7be --- /dev/null +++ b/include/clang/AST/Builtins.h @@ -0,0 +1,72 @@ +//===--- Builtins.h - Builtin function header -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines enum values for all the target-independent builtin +// functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_BUILTINS_H +#define LLVM_CLANG_AST_BUILTINS_H + +#include <cstring> + +namespace clang { + class TargetInfo; + class IdentifierTable; + class ASTContext; + class QualType; + +namespace Builtin { +enum ID { + NotBuiltin = 0, // This is not a builtin function. +#define BUILTIN(ID, TYPE, ATTRS) BI##ID, +#include "clang/AST/Builtins.def" + FirstTSBuiltin +}; + +struct Info { + const char *Name, *Type, *Attributes; + + bool operator==(const Info &RHS) const { + return !strcmp(Name, RHS.Name) && + !strcmp(Type, RHS.Type) && + !strcmp(Attributes, RHS.Attributes); + } + bool operator!=(const Info &RHS) const { return !(*this == RHS); } +}; + +/// Builtin::Context - This holds information about target-independent and +/// target-specific builtins, allowing easy queries by clients. +class Context { + const Info *TSRecords; + unsigned NumTSRecords; +public: + Context() : TSRecords(0), NumTSRecords(0) {} + + /// InitializeBuiltins - Mark the identifiers for all the builtins with their + /// appropriate builtin ID # and mark any non-portable builtin identifiers as + /// such. + void InitializeBuiltins(IdentifierTable &Table, const TargetInfo &Target); + + /// Builtin::GetName - Return the identifier name for the specified builtin, + /// e.g. "__builtin_abs". + const char *GetName(unsigned ID) const { + return GetRecord(ID).Name; + } + + /// GetBuiltinType - Return the type for the specified builtin. + QualType GetBuiltinType(unsigned ID, ASTContext &Context) const; +private: + const Info &GetRecord(unsigned ID) const; +}; + +} +} // end namespace clang +#endif diff --git a/include/clang/AST/Decl.h b/include/clang/AST/Decl.h new file mode 100644 index 0000000000..2d3b01d3ce --- /dev/null +++ b/include/clang/AST/Decl.h @@ -0,0 +1,442 @@ +//===--- Decl.h - Classes for representing declarations ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Decl interface and subclasses. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_DECL_H +#define LLVM_CLANG_AST_DECL_H + +#include "clang/Basic/SourceLocation.h" +#include "clang/AST/Type.h" +#include "llvm/ADT/APSInt.h" + +namespace clang { +class IdentifierInfo; +class Expr; +class Stmt; +class FunctionDecl; + + +/// Decl - This represents one declaration (or definition), e.g. a variable, +/// typedef, function, struct, etc. +/// +class Decl { +public: + enum Kind { + // Concrete sub-classes of ValueDecl + Function, BlockVariable, FileVariable, ParmVariable, EnumConstant, + // Concrete sub-classes of TypeDecl + Typedef, Struct, Union, Class, Enum, + // Concrete sub-class of Decl + Field + }; + + /// IdentifierNamespace - According to C99 6.2.3, there are four namespaces, + /// labels, tags, members and ordinary identifiers. + enum IdentifierNamespace { + IDNS_Label, + IDNS_Tag, + IDNS_Member, + IDNS_Ordinary + }; +private: + /// DeclKind - This indicates which class this is. + Kind DeclKind; + + /// Loc - The location that this decl. + SourceLocation Loc; + + /// Identifier - The identifier for this declaration (e.g. the name for the + /// variable, the tag for a struct). + IdentifierInfo *Identifier; + + /// When this decl is in scope while parsing, the Next field contains a + /// pointer to the shadowed decl of the same name. When the scope is popped, + /// Decls are relinked onto a containing decl object. + /// + Decl *Next; + + /// NextDeclarator - If this decl was part of a multi-declarator declaration, + /// such as "int X, Y, *Z;" this indicates Decl for the next declarator. + Decl *NextDeclarator; + +protected: + Decl(Kind DK, SourceLocation L, IdentifierInfo *Id, Decl *NextDecl) + : DeclKind(DK), Loc(L), Identifier(Id), Next(0), NextDeclarator(NextDecl) { + if (Decl::CollectingStats()) addDeclKind(DK); + } + virtual ~Decl(); + +public: + IdentifierInfo *getIdentifier() const { return Identifier; } + SourceLocation getLocation() const { return Loc; } + void setLocation(SourceLocation L) { Loc = L; } + const char *getName() const; + + Kind getKind() const { return DeclKind; } + Decl *getNext() const { return Next; } + void setNext(Decl *N) { Next = N; } + + /// getNextDeclarator - If this decl was part of a multi-declarator + /// declaration, such as "int X, Y, *Z;" this returns the decl for the next + /// declarator. Otherwise it returns null. + Decl *getNextDeclarator() { return NextDeclarator; } + const Decl *getNextDeclarator() const { return NextDeclarator; } + void setNextDeclarator(Decl *N) { NextDeclarator = N; } + + IdentifierNamespace getIdentifierNamespace() const { + switch (DeclKind) { + default: assert(0 && "Unknown decl kind!"); + case Typedef: + case Function: + case BlockVariable: + case FileVariable: + case ParmVariable: + case EnumConstant: + return IDNS_Ordinary; + case Struct: + case Union: + case Class: + case Enum: + return IDNS_Tag; + } + } + // global temp stats (until we have a per-module visitor) + static void addDeclKind(const Kind k); + static bool CollectingStats(bool enable=false); + static void PrintStats(); + + // Implement isa/cast/dyncast/etc. + static bool classof(const Decl *) { return true; } +}; + +/// ValueDecl - Represent the declaration of a variable (in which case it is +/// an lvalue) a function (in which case it is a function designator) or +/// an enum constant. +class ValueDecl : public Decl { + QualType DeclType; +protected: + ValueDecl(Kind DK, SourceLocation L, IdentifierInfo *Id, QualType T, + Decl *PrevDecl) : Decl(DK, L, Id, PrevDecl), DeclType(T) {} +public: + QualType getType() const { return DeclType; } + void setType(QualType newType) { DeclType = newType; } + QualType getCanonicalType() const { return DeclType.getCanonicalType(); } + + // Implement isa/cast/dyncast/etc. + static bool classof(const Decl *D) { + return D->getKind() >= Function && D->getKind() <= EnumConstant; + } + static bool classof(const ValueDecl *D) { return true; } +}; + +/// VarDecl - An instance of this class is created to represent a variable +/// declaration or definition. +class VarDecl : public ValueDecl { +public: + enum StorageClass { + None, Extern, Static, Auto, Register + }; + StorageClass getStorageClass() const { return SClass; } + + // Implement isa/cast/dyncast/etc. + static bool classof(const Decl *D) { + return D->getKind() >= BlockVariable && D->getKind() <= ParmVariable; + } + static bool classof(const VarDecl *D) { return true; } +protected: + VarDecl(Kind DK, SourceLocation L, IdentifierInfo *Id, QualType T, + StorageClass SC, Decl *PrevDecl) + : ValueDecl(DK, L, Id, T, PrevDecl) { SClass = SC; } +private: + StorageClass SClass; + // TODO: Initializer. +}; + +/// BlockVarDecl - Represent a local variable declaration. +class BlockVarDecl : public VarDecl { +public: + BlockVarDecl(SourceLocation L, IdentifierInfo *Id, QualType T, StorageClass S, + Decl *PrevDecl) + : VarDecl(BlockVariable, L, Id, T, S, PrevDecl) {} + + // Implement isa/cast/dyncast/etc. + static bool classof(const Decl *D) { return D->getKind() == BlockVariable; } + static bool classof(const BlockVarDecl *D) { return true; } +}; + +/// FileVarDecl - Represent a file scoped variable declaration. This +/// will allow us to reason about external variable declarations and tentative +/// definitions (C99 6.9.2p2) using our type system (without storing a +/// pointer to the decl's scope, which is transient). +class FileVarDecl : public VarDecl { +public: + FileVarDecl(SourceLocation L, IdentifierInfo *Id, QualType T, StorageClass S, + Decl *PrevDecl) + : VarDecl(FileVariable, L, Id, T, S, PrevDecl) {} + + // Implement isa/cast/dyncast/etc. + static bool classof(const Decl *D) { return D->getKind() == FileVariable; } + static bool classof(const FileVarDecl *D) { return true; } +}; + +/// ParmVarDecl - Represent a parameter to a function. +class ParmVarDecl : public VarDecl { +public: + ParmVarDecl(SourceLocation L, IdentifierInfo *Id, QualType T, StorageClass S, + Decl *PrevDecl) + : VarDecl(ParmVariable, L, Id, T, S, PrevDecl) {} + + // Implement isa/cast/dyncast/etc. + static bool classof(const Decl *D) { return D->getKind() == ParmVariable; } + static bool classof(const ParmVarDecl *D) { return true; } +}; + +/// FunctionDecl - An instance of this class is created to represent a function +/// declaration or definition. +class FunctionDecl : public ValueDecl { +public: + enum StorageClass { + None, Extern, Static + }; + FunctionDecl(SourceLocation L, IdentifierInfo *Id, QualType T, + StorageClass S = None, Decl *PrevDecl) + : ValueDecl(Function, L, Id, T, PrevDecl), + ParamInfo(0), Body(0), DeclChain(0), SClass(S) {} + virtual ~FunctionDecl(); + + Stmt *getBody() const { return Body; } + void setBody(Stmt *B) { Body = B; } + + Decl *getDeclChain() const { return DeclChain; } + void setDeclChain(Decl *D) { DeclChain = D; } + + unsigned getNumParams() const; + const ParmVarDecl *getParamDecl(unsigned i) const { + assert(i < getNumParams() && "Illegal param #"); + return ParamInfo[i]; + } + ParmVarDecl *getParamDecl(unsigned i) { + assert(i < getNumParams() && "Illegal param #"); + return ParamInfo[i]; + } + void setParams(ParmVarDecl **NewParamInfo, unsigned NumParams); + + QualType getResultType() const { + return cast<FunctionType>(getType())->getResultType(); + } + StorageClass getStorageClass() const { return SClass; } + + // Implement isa/cast/dyncast/etc. + static bool classof(const Decl *D) { return D->getKind() == Function; } + static bool classof(const FunctionDecl *D) { return true; } +private: + /// ParamInfo - new[]'d array of pointers to VarDecls for the formal + /// parameters of this function. This is null if a prototype or if there are + /// no formals. TODO: we could allocate this space immediately after the + /// FunctionDecl object to save an allocation like FunctionType does. + ParmVarDecl **ParamInfo; + + Stmt *Body; // Null if a prototype. + + /// DeclChain - Linked list of declarations that are defined inside this + /// function. + Decl *DeclChain; + + StorageClass SClass; +}; + + +/// FieldDecl - An instance of this class is created by Sema::ParseField to +/// represent a member of a struct/union/class. +class FieldDecl : public Decl { + QualType DeclType; +public: + FieldDecl(SourceLocation L, IdentifierInfo *Id, QualType T, Decl *PrevDecl) + : Decl(Field, L, Id, PrevDecl), DeclType(T) {} + + QualType getType() const { return DeclType; } + QualType getCanonicalType() const { return DeclType.getCanonicalType(); } + + // Implement isa/cast/dyncast/etc. + static bool classof(const Decl *D) { + return D->getKind() == Field; + } + static bool classof(const FieldDecl *D) { return true; } +}; + +/// EnumConstantDecl - An instance of this object exists for each enum constant +/// that is defined. For example, in "enum X {a,b}", each of a/b are +/// EnumConstantDecl's, X is an instance of EnumDecl, and the type of a/b is a +/// TagType for the X EnumDecl. +class EnumConstantDecl : public ValueDecl { + Expr *Init; // an integer constant expression + llvm::APSInt Val; // The value. +public: + EnumConstantDecl(SourceLocation L, IdentifierInfo *Id, QualType T, Expr *E, + const llvm::APSInt &V, Decl *PrevDecl) + : ValueDecl(EnumConstant, L, Id, T, PrevDecl), Init(E), Val(V) {} + + const Expr *getInitExpr() const { return Init; } + Expr *getInitExpr() { return Init; } + const llvm::APSInt &getInitVal() const { return Val; } + + // Implement isa/cast/dyncast/etc. + static bool classof(const Decl *D) { + return D->getKind() == EnumConstant; + } + static bool classof(const EnumConstantDecl *D) { return true; } +}; + + +/// TypeDecl - Represents a declaration of a type. +/// +class TypeDecl : public Decl { + /// TypeForDecl - This indicates the Type object that represents this + /// TypeDecl. It is a cache maintained by ASTContext::getTypedefType and + /// ASTContext::getTagDeclType. + Type *TypeForDecl; + friend class ASTContext; +protected: + TypeDecl(Kind DK, SourceLocation L, IdentifierInfo *Id, Decl *PrevDecl) + : Decl(DK, L, Id, PrevDecl), TypeForDecl(0) {} +public: + + // Implement isa/cast/dyncast/etc. + static bool classof(const Decl *D) { + return D->getKind() >= Typedef && D->getKind() <= Enum; + } + static bool classof(const TypeDecl *D) { return true; } +}; + + +class TypedefDecl : public TypeDecl { + /// UnderlyingType - This is the type the typedef is set to. + QualType UnderlyingType; +public: + TypedefDecl(SourceLocation L, IdentifierInfo *Id, QualType T, Decl *PrevDecl) + : TypeDecl(Typedef, L, Id, PrevDecl), UnderlyingType(T) {} + + QualType getUnderlyingType() const { return UnderlyingType; } + void setUnderlyingType(QualType newType) { UnderlyingType = newType; } + + // Implement isa/cast/dyncast/etc. + static bool classof(const Decl *D) { return D->getKind() == Typedef; } + static bool classof(const TypedefDecl *D) { return true; } +}; + + +/// TagDecl - Represents the declaration of a struct/union/class/enum. +class TagDecl : public TypeDecl { + /// IsDefinition - True if this is a definition ("struct foo {};"), false if + /// it is a declaration ("struct foo;"). + bool IsDefinition : 1; +protected: + TagDecl(Kind DK, SourceLocation L, IdentifierInfo *Id, Decl *PrevDecl) + : TypeDecl(DK, L, Id, PrevDecl) { + IsDefinition = false; + } +public: + + /// isDefinition - Return true if this decl has its body specified. + bool isDefinition() const { + return IsDefinition; + } + + const char *getKindName() const { + switch (getKind()) { + default: assert(0 && "Unknown TagDecl!"); + case Struct: return "struct"; + case Union: return "union"; + case Class: return "class"; + case Enum: return "enum"; + } + } + + // Implement isa/cast/dyncast/etc. + static bool classof(const Decl *D) { + return D->getKind() == Struct || D->getKind() == Union || + D->getKind() == Class || D->getKind() == Enum; + } + static bool classof(const TagDecl *D) { return true; } +protected: + void setDefinition(bool V) { IsDefinition = V; } +}; + +/// EnumDecl - Represents an enum. As an extension, we allow forward-declared +/// enums. +class EnumDecl : public TagDecl { + /// ElementList - this is a linked list of EnumConstantDecl's which are linked + /// together through their getNextDeclarator pointers. + EnumConstantDecl *ElementList; +public: + EnumDecl(SourceLocation L, IdentifierInfo *Id, Decl *PrevDecl) + : TagDecl(Enum, L, Id, PrevDecl) { + ElementList = 0; + } + + /// defineElements - When created, EnumDecl correspond to a forward declared + /// enum. This method is used to mark the decl as being defined, with the + /// specified list of enums. + void defineElements(EnumConstantDecl *ListHead) { + assert(!isDefinition() && "Cannot redefine enums!"); + ElementList = ListHead; + setDefinition(true); + } + + static bool classof(const Decl *D) { + return D->getKind() == Enum; + } + static bool classof(const EnumDecl *D) { return true; } +}; + + +/// RecordDecl - Represents a struct/union/class. +class RecordDecl : public TagDecl { + /// HasFlexibleArrayMember - This is true if this struct ends with a flexible + /// array member (e.g. int X[]) or if this union contains a struct that does. + /// If so, this cannot be contained in arrays or other structs as a member. + bool HasFlexibleArrayMember : 1; + + /// Members/NumMembers - This is a new[]'d array of pointers to Decls. + FieldDecl **Members; // Null if not defined. + int NumMembers; // -1 if not defined. +public: + RecordDecl(Kind DK, SourceLocation L, IdentifierInfo *Id, Decl *PrevDecl) + : TagDecl(DK, L, Id, PrevDecl) { + HasFlexibleArrayMember = false; + assert(classof(static_cast<Decl*>(this)) && "Invalid Kind!"); + Members = 0; + NumMembers = -1; + } + + bool hasFlexibleArrayMember() const { return HasFlexibleArrayMember; } + void setHasFlexibleArrayMember(bool V) { HasFlexibleArrayMember = V; } + + /// defineBody - When created, RecordDecl's correspond to a forward declared + /// record. This method is used to mark the decl as being defined, with the + /// specified contents. + void defineBody(FieldDecl **Members, unsigned numMembers); + + /// getMember - If the member doesn't exist, or there are no members, this + /// function will return 0; + FieldDecl *getMember(IdentifierInfo *name); + + static bool classof(const Decl *D) { + return D->getKind() == Struct || D->getKind() == Union || + D->getKind() == Class; + } + static bool classof(const RecordDecl *D) { return true; } +}; + +} // end namespace clang + +#endif diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h new file mode 100644 index 0000000000..fcf179e565 --- /dev/null +++ b/include/clang/AST/Expr.h @@ -0,0 +1,596 @@ +//===--- Expr.h - Classes for representing expressions ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Expr interface and subclasses. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_EXPR_H +#define LLVM_CLANG_AST_EXPR_H + +#include "clang/AST/Stmt.h" +#include "clang/AST/Type.h" +#include "clang/AST/Decl.h" +#include "llvm/ADT/APSInt.h" + +namespace clang { + class IdentifierInfo; + class Decl; + +/// Expr - This represents one expression. Note that Expr's are subclasses of +/// Stmt. This allows an expression to be transparently used any place a Stmt +/// is required. +/// +class Expr : public Stmt { + QualType TR; +protected: + Expr(StmtClass SC, QualType T) : Stmt(SC), TR(T) {} + ~Expr() {} +public: + QualType getType() const { return TR; } + + /// SourceLocation tokens are not useful in isolation - they are low level + /// value objects created/interpreted by SourceManager. We assume AST + /// clients will have a pointer to the respective SourceManager. + virtual SourceRange getSourceRange() const = 0; + SourceLocation getLocStart() const { return getSourceRange().Begin(); } + SourceLocation getLocEnd() const { return getSourceRange().End(); } + + /// getExprLoc - Return the preferred location for the arrow when diagnosing + /// a problem with a generic expression. + virtual SourceLocation getExprLoc() const { return getLocStart(); } + + /// hasLocalSideEffect - Return true if this immediate expression has side + /// effects, not counting any sub-expressions. + bool hasLocalSideEffect() const; + + /// isLvalue - C99 6.3.2.1: an lvalue is an expression with an object type or + /// incomplete type other than void. Nonarray expressions that can be lvalues: + /// - name, where name must be a variable + /// - e[i] + /// - (e), where e must be an lvalue + /// - e.name, where e must be an lvalue + /// - e->name + /// - *e, the type of e cannot be a function type + /// - string-constant + /// + enum isLvalueResult { + LV_Valid, + LV_NotObjectType, + LV_IncompleteVoidType, + LV_InvalidExpression + }; + isLvalueResult isLvalue(); + + /// isModifiableLvalue - C99 6.3.2.1: an lvalue that does not have array type, + /// does not have an incomplete type, does not have a const-qualified type, + /// and if it is a structure or union, does not have any member (including, + /// recursively, any member or element of all contained aggregates or unions) + /// with a const-qualified type. + enum isModifiableLvalueResult { + MLV_Valid, + MLV_NotObjectType, + MLV_IncompleteVoidType, + MLV_InvalidExpression, + MLV_IncompleteType, + MLV_ConstQualified, + MLV_ArrayType + }; + isModifiableLvalueResult isModifiableLvalue(); + + bool isNullPointerConstant() const; + + /// isIntegerConstantExpr - Return true if this expression is a valid integer + /// constant expression, and, if so, return its value in Result. If not a + /// valid i-c-e, return false and fill in Loc (if specified) with the location + /// of the invalid expression. + bool isIntegerConstantExpr(llvm::APSInt &Result, SourceLocation *Loc = 0, + bool isEvaluated = true) const; + bool isIntegerConstantExpr(SourceLocation *Loc = 0) const { + llvm::APSInt X(32); + return isIntegerConstantExpr(X, Loc); + } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() >= firstExprConstant && + T->getStmtClass() <= lastExprConstant; + } + static bool classof(const Expr *) { return true; } +}; + +//===----------------------------------------------------------------------===// +// Primary Expressions. +//===----------------------------------------------------------------------===// + +/// DeclRefExpr - [C99 6.5.1p2] - A reference to a declared variable, function, +/// enum, etc. +class DeclRefExpr : public Expr { + Decl *D; // a ValueDecl or EnumConstantDecl + SourceLocation Loc; +public: + DeclRefExpr(Decl *d, QualType t, SourceLocation l) : + Expr(DeclRefExprClass, t), D(d), Loc(l) {} + + Decl *getDecl() { return D; } + const Decl *getDecl() const { return D; } + virtual SourceRange getSourceRange() const { return SourceRange(Loc); } + + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == DeclRefExprClass; + } + static bool classof(const DeclRefExpr *) { return true; } +}; + +class IntegerLiteral : public Expr { + llvm::APInt Value; + SourceLocation Loc; +public: + // type should be IntTy, LongTy, LongLongTy, UnsignedIntTy, UnsignedLongTy, + // or UnsignedLongLongTy + IntegerLiteral(const llvm::APInt &V, QualType type, SourceLocation l) + : Expr(IntegerLiteralClass, type), Value(V), Loc(l) { + assert(type->isIntegerType() && "Illegal type in IntegerLiteral"); + } + const llvm::APInt &getValue() const { return Value; } + virtual SourceRange getSourceRange() const { return SourceRange(Loc); } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == IntegerLiteralClass; + } + static bool classof(const IntegerLiteral *) { return true; } +}; + +class CharacterLiteral : public Expr { + unsigned Value; + SourceLocation Loc; +public: + // type should be IntTy + CharacterLiteral(unsigned value, QualType type, SourceLocation l) + : Expr(CharacterLiteralClass, type), Value(value), Loc(l) { + } + virtual SourceRange getSourceRange() const { return SourceRange(Loc); } + + unsigned getValue() const { return Value; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == CharacterLiteralClass; + } + static bool classof(const CharacterLiteral *) { return true; } +}; + +class FloatingLiteral : public Expr { + float Value; // FIXME + SourceLocation Loc; +public: + FloatingLiteral(float value, QualType type, SourceLocation l) + : Expr(FloatingLiteralClass, type), Value(value), Loc(l) {} + + float getValue() const { return Value; } + + virtual SourceRange getSourceRange() const { return SourceRange(Loc); } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == FloatingLiteralClass; + } + static bool classof(const FloatingLiteral *) { return true; } +}; + +class StringLiteral : public Expr { + const char *StrData; + unsigned ByteLength; + bool IsWide; + // if the StringLiteral was composed using token pasting, both locations + // are needed. If not (the common case), firstTokLoc == lastTokLoc. + // FIXME: if space becomes an issue, we should create a sub-class. + SourceLocation firstTokLoc, lastTokLoc; +public: + StringLiteral(const char *strData, unsigned byteLength, bool Wide, + QualType t, SourceLocation b, SourceLocation e); + virtual ~StringLiteral(); + + const char *getStrData() const { return StrData; } + unsigned getByteLength() const { return ByteLength; } + bool isWide() const { return IsWide; } + + virtual SourceRange getSourceRange() const { + return SourceRange(firstTokLoc,lastTokLoc); + } + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == StringLiteralClass; + } + static bool classof(const StringLiteral *) { return true; } +}; + +/// ParenExpr - This represents a parethesized expression, e.g. "(1)". This +/// AST node is only formed if full location information is requested. +class ParenExpr : public Expr { + SourceLocation L, R; + Expr *Val; +public: + ParenExpr(SourceLocation l, SourceLocation r, Expr *val) + : Expr(ParenExprClass, val->getType()), L(l), R(r), Val(val) {} + + const Expr *getSubExpr() const { return Val; } + Expr *getSubExpr() { return Val; } + SourceRange getSourceRange() const { return SourceRange(L, R); } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == ParenExprClass; + } + static bool classof(const ParenExpr *) { return true; } +}; + + +/// UnaryOperator - This represents the unary-expression's (except sizeof of +/// types), the postinc/postdec operators from postfix-expression, and various +/// extensions. +class UnaryOperator : public Expr { +public: + enum Opcode { + PostInc, PostDec, // [C99 6.5.2.4] Postfix increment and decrement operators + PreInc, PreDec, // [C99 6.5.3.1] Prefix increment and decrement operators. + AddrOf, Deref, // [C99 6.5.3.2] Address and indirection operators. + Plus, Minus, // [C99 6.5.3.3] Unary arithmetic operators. + Not, LNot, // [C99 6.5.3.3] Unary arithmetic operators. + SizeOf, AlignOf, // [C99 6.5.3.4] Sizeof (expr, not type) operator. + Real, Imag, // "__real expr"/"__imag expr" Extension. + Extension // __extension__ marker. + }; +private: + Expr *Val; + Opcode Opc; + SourceLocation Loc; +public: + + UnaryOperator(Expr *input, Opcode opc, QualType type, SourceLocation l) + : Expr(UnaryOperatorClass, type), Val(input), Opc(opc), Loc(l) {} + + Opcode getOpcode() const { return Opc; } + Expr *getSubExpr() const { return Val; } + + /// getOperatorLoc - Return the location of the operator. + SourceLocation getOperatorLoc() const { return Loc; } + + /// isPostfix - Return true if this is a postfix operation, like x++. + static bool isPostfix(Opcode Op); + + bool isPostfix() const { return isPostfix(Opc); } + bool isIncrementDecrementOp() const { return Opc>=PostInc && Opc<=PreDec; } + bool isSizeOfAlignOfOp() const { return Opc == SizeOf || Opc == AlignOf; } + static bool isArithmeticOp(Opcode Op) { return Op >= Plus && Op <= LNot; } + + /// getDecl - a recursive routine that derives the base decl for an + /// expression. For example, it will return the declaration for "s" from + /// the following complex expression "s.zz[2].bb.vv". + static bool isAddressable(Expr *e); + + /// getOpcodeStr - Turn an Opcode enum value into the punctuation char it + /// corresponds to, e.g. "sizeof" or "[pre]++" + static const char *getOpcodeStr(Opcode Op); + + virtual SourceRange getSourceRange() const { + if (isPostfix()) + return SourceRange(Val->getLocStart(), Loc); + else + return SourceRange(Loc, Val->getLocEnd()); + } + virtual SourceLocation getExprLoc() const { return Loc; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == UnaryOperatorClass; + } + static bool classof(const UnaryOperator *) { return true; } +}; + +/// SizeOfAlignOfTypeExpr - [C99 6.5.3.4] - This is only for sizeof/alignof of +/// *types*. sizeof(expr) is handled by UnaryOperator. +class SizeOfAlignOfTypeExpr : public Expr { + bool isSizeof; // true if sizeof, false if alignof. + QualType Ty; + SourceLocation OpLoc, RParenLoc; +public: + SizeOfAlignOfTypeExpr(bool issizeof, QualType argType, QualType resultType, + SourceLocation op, SourceLocation rp) : + Expr(SizeOfAlignOfTypeExprClass, resultType), + isSizeof(issizeof), Ty(argType), OpLoc(op), RParenLoc(rp) {} + + bool isSizeOf() const { return isSizeof; } + QualType getArgumentType() const { return Ty; } + SourceRange getSourceRange() const { return SourceRange(OpLoc, RParenLoc); } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == SizeOfAlignOfTypeExprClass; + } + static bool classof(const SizeOfAlignOfTypeExpr *) { return true; } +}; + +//===----------------------------------------------------------------------===// +// Postfix Operators. +//===----------------------------------------------------------------------===// + +/// ArraySubscriptExpr - [C99 6.5.2.1] Array Subscripting. +class ArraySubscriptExpr : public Expr { + Expr *Base, *Idx; + SourceLocation RBracketLoc; +public: + ArraySubscriptExpr(Expr *base, Expr *idx, QualType t, + SourceLocation rbracketloc) : + Expr(ArraySubscriptExprClass, t), + Base(base), Idx(idx), RBracketLoc(rbracketloc) {} + + Expr *getBase() { return Base; } + const Expr *getBase() const { return Base; } + Expr *getIdx() { return Idx; } + const Expr *getIdx() const { return Idx; } + + SourceRange getSourceRange() const { + return SourceRange(Base->getLocStart(), RBracketLoc); + } + virtual SourceLocation getExprLoc() const { return RBracketLoc; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == ArraySubscriptExprClass; + } + static bool classof(const ArraySubscriptExpr *) { return true; } +}; + + +/// CallExpr - [C99 6.5.2.2] Function Calls. +/// +class CallExpr : public Expr { + Expr *Fn; + Expr **Args; + unsigned NumArgs; + SourceLocation RParenLoc; +public: + CallExpr(Expr *fn, Expr **args, unsigned numargs, QualType t, + SourceLocation rparenloc); + ~CallExpr() { + delete [] Args; + } + + const Expr *getCallee() const { return Fn; } + Expr *getCallee() { return Fn; } + + /// getNumArgs - Return the number of actual arguments to this call. + /// + unsigned getNumArgs() const { return NumArgs; } + + /// getArg - Return the specified argument. + Expr *getArg(unsigned Arg) { + assert(Arg < NumArgs && "Arg access out of range!"); + return Args[Arg]; + } + const Expr *getArg(unsigned Arg) const { + assert(Arg < NumArgs && "Arg access out of range!"); + return Args[Arg]; + } + + /// getNumCommas - Return the number of commas that must have been present in + /// this function call. + unsigned getNumCommas() const { return NumArgs ? NumArgs - 1 : 0; } + + SourceRange getSourceRange() const { + return SourceRange(Fn->getLocStart(), RParenLoc); + } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == CallExprClass; + } + static bool classof(const CallExpr *) { return true; } +}; + +/// MemberExpr - [C99 6.5.2.3] Structure and Union Members. +/// +class MemberExpr : public Expr { + Expr *Base; + FieldDecl *MemberDecl; + SourceLocation MemberLoc; + bool IsArrow; // True if this is "X->F", false if this is "X.F". +public: + MemberExpr(Expr *base, bool isarrow, FieldDecl *memberdecl, SourceLocation l) + : Expr(MemberExprClass, memberdecl->getType()), + Base(base), MemberDecl(memberdecl), MemberLoc(l), IsArrow(isarrow) {} + + Expr *getBase() const { return Base; } + FieldDecl *getMemberDecl() const { return MemberDecl; } + bool isArrow() const { return IsArrow; } + + virtual SourceRange getSourceRange() const { + return SourceRange(getBase()->getLocStart(), MemberLoc); + } + virtual SourceLocation getExprLoc() const { return MemberLoc; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == MemberExprClass; + } + static bool classof(const MemberExpr *) { return true; } +}; + +/// CastExpr - [C99 6.5.4] Cast Operators. +/// +class CastExpr : public Expr { + QualType Ty; + Expr *Op; + SourceLocation Loc; // the location of the left paren +public: + CastExpr(QualType ty, Expr *op, SourceLocation l) : + Expr(CastExprClass, ty), Ty(ty), Op(op), Loc(l) {} + CastExpr(StmtClass SC, QualType ty, Expr *op) : + Expr(SC, QualType()), Ty(ty), Op(op), Loc(SourceLocation()) {} + + SourceLocation getLParenLoc() const { return Loc; } + + QualType getDestType() const { return Ty; } + Expr *getSubExpr() const { return Op; } + + virtual SourceRange getSourceRange() const { + return SourceRange(Loc, getSubExpr()->getSourceRange().End()); + } + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == CastExprClass; + } + static bool classof(const CastExpr *) { return true; } +}; + + +class BinaryOperator : public Expr { +public: + enum Opcode { + // Operators listed in order of precedence. + Mul, Div, Rem, // [C99 6.5.5] Multiplicative operators. + Add, Sub, // [C99 6.5.6] Additive operators. + Shl, Shr, // [C99 6.5.7] Bitwise shift operators. + LT, GT, LE, GE, // [C99 6.5.8] Relational operators. + EQ, NE, // [C99 6.5.9] Equality operators. + And, // [C99 6.5.10] Bitwise AND operator. + Xor, // [C99 6.5.11] Bitwise XOR operator. + Or, // [C99 6.5.12] Bitwise OR operator. + LAnd, // [C99 6.5.13] Logical AND operator. + LOr, // [C99 6.5.14] Logical OR operator. + Assign, MulAssign,// [C99 6.5.16] Assignment operators. + DivAssign, RemAssign, + AddAssign, SubAssign, + ShlAssign, ShrAssign, + AndAssign, XorAssign, + OrAssign, + Comma // [C99 6.5.17] Comma operator. + }; + + BinaryOperator(Expr *lhs, Expr *rhs, Opcode opc, QualType ResTy) + : Expr(BinaryOperatorClass, ResTy), LHS(lhs), RHS(rhs), Opc(opc) { + assert(!isCompoundAssignmentOp() && + "Use ArithAssignBinaryOperator for compound assignments"); + } + + Opcode getOpcode() const { return Opc; } + Expr *getLHS() const { return LHS; } + Expr *getRHS() const { return RHS; } + virtual SourceRange getSourceRange() const { + return SourceRange(getLHS()->getLocStart(), getRHS()->getLocEnd()); + } + + /// getOpcodeStr - Turn an Opcode enum value into the punctuation char it + /// corresponds to, e.g. "<<=". + static const char *getOpcodeStr(Opcode Op); + + /// predicates to categorize the respective opcodes. + bool isMultiplicativeOp() const { return Opc >= Mul && Opc <= Rem; } + bool isAdditiveOp() const { return Opc == Add || Opc == Sub; } + bool isShiftOp() const { return Opc == Shl || Opc == Shr; } + bool isBitwiseOp() const { return Opc >= And && Opc <= Or; } + bool isRelationalOp() const { return Opc >= LT && Opc <= GE; } + bool isEqualityOp() const { return Opc == EQ || Opc == NE; } + bool isLogicalOp() const { return Opc == LAnd || Opc == LOr; } + bool isAssignmentOp() const { return Opc >= Assign && Opc <= OrAssign; } + bool isCompoundAssignmentOp() const { return Opc > Assign && Opc <= OrAssign;} + bool isShiftAssignOp() const { return Opc == ShlAssign || Opc == ShrAssign; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == BinaryOperatorClass; + } + static bool classof(const BinaryOperator *) { return true; } +private: + Expr *LHS, *RHS; + Opcode Opc; +protected: + BinaryOperator(Expr *lhs, Expr *rhs, Opcode opc, QualType ResTy, bool dead) + : Expr(BinaryOperatorClass, ResTy), LHS(lhs), RHS(rhs), Opc(opc) { + } +}; + +/// CompoundAssignOperator - For compound assignments (e.g. +=), we keep +/// track of the type the operation is performed in. Due to the semantics of +/// these operators, the operands are promoted, the aritmetic performed, an +/// implicit conversion back to the result type done, then the assignment takes +/// place. This captures the intermediate type which the computation is done +/// in. +class CompoundAssignOperator : public BinaryOperator { + QualType ComputationType; +public: + CompoundAssignOperator(Expr *lhs, Expr *rhs, Opcode opc, + QualType ResType, QualType CompType) + : BinaryOperator(lhs, rhs, opc, ResType, true), ComputationType(CompType) { + assert(isCompoundAssignmentOp() && + "Only should be used for compound assignments"); + } + + QualType getComputationType() const { return ComputationType; } + + static bool classof(const CompoundAssignOperator *) { return true; } + static bool classof(const BinaryOperator *B) { + return B->isCompoundAssignmentOp(); + } + static bool classof(const Stmt *S) { + return isa<BinaryOperator>(S) && classof(cast<BinaryOperator>(S)); + } +}; + +/// ConditionalOperator - The ?: operator. Note that LHS may be null when the +/// GNU "missing LHS" extension is in use. +/// +class ConditionalOperator : public Expr { + Expr *Cond, *LHS, *RHS; // Left/Middle/Right hand sides. +public: + ConditionalOperator(Expr *cond, Expr *lhs, Expr *rhs, QualType t) + : Expr(ConditionalOperatorClass, t), Cond(cond), LHS(lhs), RHS(rhs) {} + + Expr *getCond() const { return Cond; } + Expr *getLHS() const { return LHS; } + Expr *getRHS() const { return RHS; } + + virtual SourceRange getSourceRange() const { + return SourceRange(getCond()->getLocStart(), getRHS()->getLocEnd()); + } + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == ConditionalOperatorClass; + } + static bool classof(const ConditionalOperator *) { return true; } +}; + +/// AddrLabel - The GNU address of label extension, representing &&label. +class AddrLabel : public Expr { + SourceLocation AmpAmpLoc, LabelLoc; + LabelStmt *Label; +public: + AddrLabel(SourceLocation AALoc, SourceLocation LLoc, LabelStmt *L, QualType t) + : Expr(AddrLabelClass, t), AmpAmpLoc(AALoc), LabelLoc(LLoc), Label(L) {} + + virtual SourceRange getSourceRange() const { + return SourceRange(AmpAmpLoc, LabelLoc); + } + + LabelStmt *getLabel() const { return Label; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == AddrLabelClass; + } + static bool classof(const AddrLabel *) { return true; } +}; + +} // end namespace clang + +#endif diff --git a/include/clang/AST/ExprCXX.h b/include/clang/AST/ExprCXX.h new file mode 100644 index 0000000000..cfffead2ab --- /dev/null +++ b/include/clang/AST/ExprCXX.h @@ -0,0 +1,81 @@ +//===--- ExprCXX.h - Classes for representing expressions -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Bill Wendling and is distributed under the +// University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Expr interface and subclasses for C++ expressions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_EXPRCXX_H +#define LLVM_CLANG_AST_EXPRCXX_H + +#include "clang/AST/Expr.h" + +namespace clang { + + //===--------------------------------------------------------------------===// + // C++ Expressions. + //===--------------------------------------------------------------------===// + + /// CXXCastExpr - [C++ 5.2.7, 5.2.9, 5.2.10, 5.2.11] C++ Cast Operators. + /// + class CXXCastExpr : public Expr { + public: + enum Opcode { + DynamicCast, + StaticCast, + ReinterpretCast, + ConstCast + }; + private: + QualType Ty; + Opcode Opc; + Expr *Op; + SourceLocation Loc; // the location of the casting op + public: + CXXCastExpr(Opcode op, QualType ty, Expr *expr, SourceLocation l) + : Expr(CXXCastExprClass, ty), Ty(ty), Opc(op), Op(expr), Loc(l) {} + + QualType getDestType() const { return Ty; } + Expr *getSubExpr() const { return Op; } + + Opcode getOpcode() const { return Opc; } + + virtual SourceRange getSourceRange() const { + return SourceRange(Loc, getSubExpr()->getSourceRange().End()); + } + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == CXXCastExprClass; + } + static bool classof(const CXXCastExpr *) { return true; } + }; + + /// CXXBoolLiteralExpr - [C++ 2.13.5] C++ Boolean Literal. + /// + class CXXBoolLiteralExpr : public Expr { + bool Value; + SourceLocation Loc; + public: + CXXBoolLiteralExpr(bool val, SourceLocation l) : + Expr(CXXBoolLiteralExprClass, QualType()), Value(val), Loc(l) {} + + bool getValue() const { return Value; } + + virtual SourceRange getSourceRange() const { return SourceRange(Loc); } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == CXXBoolLiteralExprClass; + } + static bool classof(const CXXBoolLiteralExpr *) { return true; } + }; + +} // end namespace clang + +#endif diff --git a/include/clang/AST/Stmt.h b/include/clang/AST/Stmt.h new file mode 100644 index 0000000000..f3fb907935 --- /dev/null +++ b/include/clang/AST/Stmt.h @@ -0,0 +1,378 @@ +//===--- Stmt.h - Classes for representing statements -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Stmt interface and subclasses. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_STMT_H +#define LLVM_CLANG_AST_STMT_H + +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/SmallVector.h" +#include <iosfwd> + +namespace clang { + class Expr; + class Decl; + class IdentifierInfo; + class StmtVisitor; + +/// Stmt - This represents one statement. +/// +class Stmt { +public: + enum StmtClass { +#define STMT(N, CLASS, PARENT) CLASS##Class = N, +#define FIRST_STMT(N) firstStmtConstant = N, +#define LAST_STMT(N) lastStmtConstant = N, +#define FIRST_EXPR(N) firstExprConstant = N, +#define LAST_EXPR(N) lastExprConstant = N +#include "clang/AST/StmtNodes.def" +}; +private: + const StmtClass sClass; +public: + Stmt(StmtClass SC) : sClass(SC) { + if (Stmt::CollectingStats()) Stmt::addStmtClass(SC); + } + virtual ~Stmt() {} + + StmtClass getStmtClass() const { return sClass; } + const char *getStmtClassName() const; + + // global temp stats (until we have a per-module visitor) + static void addStmtClass(const StmtClass s); + static bool CollectingStats(bool enable=false); + static void PrintStats(); + + void dump() const; + void print(std::ostream &OS) const; + + // Implement visitor support. + virtual void visit(StmtVisitor &Visitor); + + // Implement isa<T> support. + static bool classof(const Stmt *) { return true; } +}; + +/// DeclStmt - Adaptor class for mixing declarations with statements and +/// expressions. For example, CompoundStmt mixes statements, expressions +/// and declarations (variables, types). Another example is ForStmt, where +/// the first statement can be an expression or a declaration. +/// +class DeclStmt : public Stmt { + Decl *TheDecl; +public: + DeclStmt(Decl *D) : Stmt(DeclStmtClass), TheDecl(D) {} + + const Decl *getDecl() const { return TheDecl; } + Decl *getDecl() { return TheDecl; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == DeclStmtClass; + } + static bool classof(const DeclStmt *) { return true; } +}; + +/// NullStmt - This is the null statement ";": C99 6.8.3p3. +/// +class NullStmt : public Stmt { + SourceLocation SemiLoc; +public: + NullStmt(SourceLocation L) : Stmt(NullStmtClass), SemiLoc(L) {} + + SourceLocation getSemiLoc() const { return SemiLoc; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == NullStmtClass; + } + static bool classof(const NullStmt *) { return true; } +}; + +/// CompoundStmt - This represents a group of statements like { stmt stmt }. +/// +class CompoundStmt : public Stmt { + llvm::SmallVector<Stmt*, 16> Body; +public: + CompoundStmt(Stmt **StmtStart, unsigned NumStmts) + : Stmt(CompoundStmtClass), Body(StmtStart, StmtStart+NumStmts) {} + + typedef llvm::SmallVector<Stmt*, 16>::iterator body_iterator; + body_iterator body_begin() { return Body.begin(); } + body_iterator body_end() { return Body.end(); } + + typedef llvm::SmallVector<Stmt*, 16>::const_iterator const_body_iterator; + const_body_iterator body_begin() const { return Body.begin(); } + const_body_iterator body_end() const { return Body.end(); } + + void push_back(Stmt *S) { Body.push_back(S); } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == CompoundStmtClass; + } + static bool classof(const CompoundStmt *) { return true; } +}; + +class CaseStmt : public Stmt { + Expr *LHSVal; + Expr *RHSVal; // Non-null for GNU "case 1 ... 4" extension + Stmt *SubStmt; +public: + CaseStmt(Expr *lhs, Expr *rhs, Stmt *substmt) + : Stmt(CaseStmtClass), LHSVal(lhs), RHSVal(rhs), SubStmt(substmt) {} + + Expr *getLHS() { return LHSVal; } + Expr *getRHS() { return RHSVal; } + Stmt *getSubStmt() { return SubStmt; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == CaseStmtClass; + } + static bool classof(const CaseStmt *) { return true; } +}; + +class DefaultStmt : public Stmt { + Stmt *SubStmt; +public: + DefaultStmt(Stmt *substmt) : Stmt(DefaultStmtClass), SubStmt(substmt) {} + + Stmt *getSubStmt() { return SubStmt; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == DefaultStmtClass; + } + static bool classof(const DefaultStmt *) { return true; } +}; + +class LabelStmt : public Stmt { + SourceLocation IdentLoc; + IdentifierInfo *Label; + Stmt *SubStmt; +public: + LabelStmt(SourceLocation IL, IdentifierInfo *label, Stmt *substmt) + : Stmt(LabelStmtClass), IdentLoc(IL), Label(label), SubStmt(substmt) {} + + SourceLocation getIdentLoc() const { return IdentLoc; } + IdentifierInfo *getID() const { return Label; } + const char *getName() const; + Stmt *getSubStmt() { return SubStmt; } + const Stmt *getSubStmt() const { return SubStmt; } + + void setIdentLoc(SourceLocation L) { IdentLoc = L; } + void setSubStmt(Stmt *SS) { SubStmt = SS; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == LabelStmtClass; + } + static bool classof(const LabelStmt *) { return true; } +}; + + +/// IfStmt - This represents an if/then/else. +/// +class IfStmt : public Stmt { + Expr *Cond; + Stmt *Then, *Else; +public: + IfStmt(Expr *cond, Stmt *then, Stmt *elsev = 0) + : Stmt(IfStmtClass), Cond(cond), Then(then), Else(elsev) {} + + const Expr *getCond() const { return Cond; } + const Stmt *getThen() const { return Then; } + const Stmt *getElse() const { return Else; } + + Expr *getCond() { return Cond; } + Stmt *getThen() { return Then; } + Stmt *getElse() { return Else; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == IfStmtClass; + } + static bool classof(const IfStmt *) { return true; } +}; + +/// SwitchStmt - This represents a 'switch' stmt. +/// +class SwitchStmt : public Stmt { + Expr *Cond; + Stmt *Body; +public: + SwitchStmt(Expr *cond, Stmt *body) + : Stmt(SwitchStmtClass), Cond(cond), Body(body) {} + + Expr *getCond() { return Cond; } + Stmt *getBody() { return Body; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == SwitchStmtClass; + } + static bool classof(const SwitchStmt *) { return true; } +}; + + +/// WhileStmt - This represents a 'while' stmt. +/// +class WhileStmt : public Stmt { + Expr *Cond; + Stmt *Body; +public: + WhileStmt(Expr *cond, Stmt *body) + : Stmt(WhileStmtClass), Cond(cond), Body(body) {} + + Expr *getCond() { return Cond; } + const Expr *getCond() const { return Cond; } + Stmt *getBody() { return Body; } + const Stmt *getBody() const { return Body; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == WhileStmtClass; + } + static bool classof(const WhileStmt *) { return true; } +}; + +/// DoStmt - This represents a 'do/while' stmt. +/// +class DoStmt : public Stmt { + Stmt *Body; + Expr *Cond; +public: + DoStmt(Stmt *body, Expr *cond) + : Stmt(DoStmtClass), Body(body), Cond(cond) {} + + Stmt *getBody() { return Body; } + const Stmt *getBody() const { return Body; } + Expr *getCond() { return Cond; } + const Expr *getCond() const { return Cond; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == DoStmtClass; + } + static bool classof(const DoStmt *) { return true; } +}; + + +/// ForStmt - This represents a 'for (init;cond;inc)' stmt. Note that any of +/// the init/cond/inc parts of the ForStmt will be null if they were not +/// specified in the source. +/// +class ForStmt : public Stmt { + Stmt *Init; // Expression or declstmt. + Expr *Cond, *Inc; + Stmt *Body; +public: + ForStmt(Stmt *init, Expr *cond, Expr *inc, Stmt *body) + : Stmt(ForStmtClass), Init(init), Cond(cond), Inc(inc), Body(body) {} + + Stmt *getInit() { return Init; } + Expr *getCond() { return Cond; } + Expr *getInc() { return Inc; } + Stmt *getBody() { return Body; } + + const Stmt *getInit() const { return Init; } + const Expr *getCond() const { return Cond; } + const Expr *getInc() const { return Inc; } + const Stmt *getBody() const { return Body; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == ForStmtClass; + } + static bool classof(const ForStmt *) { return true; } +}; + +/// GotoStmt - This represents a direct goto. +/// +class GotoStmt : public Stmt { + LabelStmt *Label; +public: + GotoStmt(LabelStmt *label) : Stmt(GotoStmtClass), Label(label) {} + + LabelStmt *getLabel() const { return Label; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == GotoStmtClass; + } + static bool classof(const GotoStmt *) { return true; } +}; + +/// IndirectGotoStmt - This represents an indirect goto. +/// +class IndirectGotoStmt : public Stmt { + Expr *Target; +public: + IndirectGotoStmt(Expr *target) : Stmt(IndirectGotoStmtClass), + Target(target) {} + + Expr *getTarget() { return Target; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == IndirectGotoStmtClass; + } + static bool classof(const IndirectGotoStmt *) { return true; } +}; + + +/// ContinueStmt - This represents a continue. +/// +class ContinueStmt : public Stmt { +public: + ContinueStmt() : Stmt(ContinueStmtClass) {} + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == ContinueStmtClass; + } + static bool classof(const ContinueStmt *) { return true; } +}; + +/// BreakStmt - This represents a break. +/// +class BreakStmt : public Stmt { +public: + BreakStmt() : Stmt(BreakStmtClass) {} + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == BreakStmtClass; + } + static bool classof(const BreakStmt *) { return true; } +}; + + +/// ReturnStmt - This represents a return, optionally of an expression. +/// +class ReturnStmt : public Stmt { + Expr *RetExpr; +public: + ReturnStmt(Expr *E = 0) : Stmt(ReturnStmtClass), RetExpr(E) {} + + const Expr *getRetValue() const { return RetExpr; } + Expr *getRetValue() { return RetExpr; } + + virtual void visit(StmtVisitor &Visitor); + static bool classof(const Stmt *T) { + return T->getStmtClass() == ReturnStmtClass; + } + static bool classof(const ReturnStmt *) { return true; } +}; + +} // end namespace clang + +#endif diff --git a/include/clang/AST/StmtNodes.def b/include/clang/AST/StmtNodes.def new file mode 100644 index 0000000000..6595223a0c --- /dev/null +++ b/include/clang/AST/StmtNodes.def @@ -0,0 +1,74 @@ +//===-- StmtNodes.def - Metadata about Stmt AST nodes -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the AST Node info database. +// +//===----------------------------------------------------------------------===// + +#ifndef FIRST_STMT +#define FIRST_STMT(n) +#define LAST_STMT(n) +#endif + +#ifndef FIRST_EXPR +#define FIRST_EXPR(n) +#define LAST_EXPR(n) +#endif + +// Normal Statements. +FIRST_STMT(1) +STMT( 1, NullStmt , Stmt) +STMT( 2, CompoundStmt , Stmt) +STMT( 3, CaseStmt , Stmt) +STMT( 4, DefaultStmt , Stmt) +STMT( 5, LabelStmt , Stmt) +STMT( 6, IfStmt , Stmt) +STMT( 7, SwitchStmt , Stmt) +STMT( 8, WhileStmt , Stmt) +STMT( 9, DoStmt , Stmt) +STMT(10, ForStmt , Stmt) +STMT(11, GotoStmt , Stmt) +STMT(12, IndirectGotoStmt, Stmt) +STMT(13, ContinueStmt , Stmt) +STMT(14, BreakStmt , Stmt) +STMT(15, ReturnStmt , Stmt) +STMT(16, DeclStmt , Stmt) +LAST_STMT(16) + +FIRST_EXPR(32) +// Expressions. +STMT(32, Expr , Stmt) +STMT(33, DeclRefExpr , Expr) +STMT(34, IntegerLiteral , Expr) +STMT(35, FloatingLiteral , Expr) +STMT(36, StringLiteral , Expr) +STMT(37, CharacterLiteral , Expr) +STMT(38, ParenExpr , Expr) +STMT(39, UnaryOperator , Expr) +STMT(40, SizeOfAlignOfTypeExpr, Expr) +STMT(41, ArraySubscriptExpr , Expr) +STMT(42, CallExpr , Expr) +STMT(43, MemberExpr , Expr) +STMT(44, CastExpr , Expr) +STMT(45, BinaryOperator , Expr) +STMT(46, ConditionalOperator , Expr) + +// GNU Extensions. +STMT(47, AddrLabel , Expr) + +// C++ Expressions. +STMT(48, CXXCastExpr , Expr) +STMT(49, CXXBoolLiteralExpr , Expr) +LAST_EXPR(49) + +#undef STMT +#undef FIRST_STMT +#undef LAST_STMT +#undef FIRST_EXPR +#undef LAST_EXPR diff --git a/include/clang/AST/StmtVisitor.h b/include/clang/AST/StmtVisitor.h new file mode 100644 index 0000000000..9422765e8c --- /dev/null +++ b/include/clang/AST/StmtVisitor.h @@ -0,0 +1,40 @@ +//===--- StmtVisitor.h - Visitor for Stmt subclasses ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the StmtVisitor interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_STMTVISITOR_H +#define LLVM_CLANG_AST_STMTVISITOR_H + +namespace clang { + class Stmt; + // Add prototypes for all AST node classes. +#define STMT(N, CLASS, PARENT) \ + class CLASS; +#include "clang/AST/StmtNodes.def" + +/// StmtVisitor - This class implements a simple visitor for Stmt subclasses. +/// Since Expr derives from Stmt, this also includes support for visiting Exprs. +class StmtVisitor { +public: + virtual ~StmtVisitor(); + + virtual void VisitStmt(Stmt *Node) {} + + // Implement all the methods with the StmtNodes.def file. +#define STMT(N, CLASS, PARENT) \ + virtual void Visit##CLASS(CLASS *Node); +#include "clang/AST/StmtNodes.def" +}; + +} + +#endif diff --git a/include/clang/AST/Type.h b/include/clang/AST/Type.h new file mode 100644 index 0000000000..93969b2684 --- /dev/null +++ b/include/clang/AST/Type.h @@ -0,0 +1,648 @@ +//===--- Type.h - C Language Family Type Representation ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Type interface and subclasses. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_TYPE_H +#define LLVM_CLANG_AST_TYPE_H + +#include "llvm/Support/Casting.h" +#include "llvm/ADT/FoldingSet.h" + +using llvm::isa; +using llvm::cast; +using llvm::cast_or_null; +using llvm::dyn_cast; +using llvm::dyn_cast_or_null; + +namespace clang { + class ASTContext; + class Type; + class TypedefDecl; + class TagDecl; + class RecordDecl; + class EnumDecl; + class Expr; + class SourceLocation; + +/// QualType - For efficiency, we don't store CVR-qualified types as nodes on +/// their own: instead each reference to a type stores the qualifiers. This +/// greatly reduces the number of nodes we need to allocate for types (for +/// example we only need one for 'int', 'const int', 'volatile int', +/// 'const volatile int', etc). +/// +/// As an added efficiency bonus, instead of making this a pair, we just store +/// the three bits we care about in the low bits of the pointer. To handle the +/// packing/unpacking, we make QualType be a simple wrapper class that acts like +/// a smart pointer. +class QualType { + uintptr_t ThePtr; +public: + enum TQ { // NOTE: These flags must be kept in sync with DeclSpec::TQ. + Const = 0x1, + Restrict = 0x2, + Volatile = 0x4, + CVRFlags = Const|Restrict|Volatile + }; + + QualType() : ThePtr(0) {} + + QualType(Type *Ptr, unsigned Quals) { + assert((Quals & ~CVRFlags) == 0 && "Invalid type qualifiers!"); + ThePtr = reinterpret_cast<uintptr_t>(Ptr); + assert((ThePtr & CVRFlags) == 0 && "Type pointer not 8-byte aligned?"); + ThePtr |= Quals; + } + + static QualType getFromOpaquePtr(void *Ptr) { + QualType T; + T.ThePtr = reinterpret_cast<uintptr_t>(Ptr); + return T; + } + + unsigned getQualifiers() const { + return ThePtr & CVRFlags; + } + Type *getTypePtr() const { + return reinterpret_cast<Type*>(ThePtr & ~CVRFlags); + } + + void *getAsOpaquePtr() const { + return reinterpret_cast<void*>(ThePtr); + } + + Type &operator*() const { + return *getTypePtr(); + } + + Type *operator->() const { + return getTypePtr(); + } + + /// isNull - Return true if this QualType doesn't point to a type yet. + bool isNull() const { + return ThePtr == 0; + } + + bool isConstQualified() const { + return ThePtr & Const; + } + bool isVolatileQualified() const { + return ThePtr & Volatile; + } + bool isRestrictQualified() const { + return ThePtr & Restrict; + } + + QualType getQualifiedType(unsigned TQs) const { + return QualType(getTypePtr(), TQs); + } + + QualType getUnqualifiedType() const { + return QualType(getTypePtr(), 0); + } + + /// operator==/!= - Indicate whether the specified types and qualifiers are + /// identical. + bool operator==(const QualType &RHS) const { + return ThePtr == RHS.ThePtr; + } + bool operator!=(const QualType &RHS) const { + return ThePtr != RHS.ThePtr; + } + std::string getAsString() const { + std::string S; + getAsStringInternal(S); + return S; + } + void getAsStringInternal(std::string &Str) const; + + void dump(const char *s = 0) const; + + /// getCanonicalType - Return the canonical version of this type, with the + /// appropriate type qualifiers on it. + inline QualType getCanonicalType() const; + +private: +}; + +} // end clang. + +namespace llvm { +/// Implement simplify_type for QualType, so that we can dyn_cast from QualType +/// to a specific Type class. +template<> struct simplify_type<const ::clang::QualType> { + typedef ::clang::Type* SimpleType; + static SimpleType getSimplifiedValue(const ::clang::QualType &Val) { + return Val.getTypePtr(); + } +}; +template<> struct simplify_type< ::clang::QualType> + : public simplify_type<const ::clang::QualType> {}; +} + +namespace clang { + +/// Type - This is the base class of the type hierarchy. A central concept +/// with types is that each type always has a canonical type. A canonical type +/// is the type with any typedef names stripped out of it or the types it +/// references. For example, consider: +/// +/// typedef int foo; +/// typedef foo* bar; +/// 'int *' 'foo *' 'bar' +/// +/// There will be a Type object created for 'int'. Since int is canonical, its +/// canonicaltype pointer points to itself. There is also a Type for 'foo' (a +/// TypeNameType). Its CanonicalType pointer points to the 'int' Type. Next +/// there is a PointerType that represents 'int*', which, like 'int', is +/// canonical. Finally, there is a PointerType type for 'foo*' whose canonical +/// type is 'int*', and there is a TypeNameType for 'bar', whose canonical type +/// is also 'int*'. +/// +/// Non-canonical types are useful for emitting diagnostics, without losing +/// information about typedefs being used. Canonical types are useful for type +/// comparisons (they allow by-pointer equality tests) and useful for reasoning +/// about whether something has a particular form (e.g. is a function type), +/// because they implicitly, recursively, strip all typedefs out of a type. +/// +/// Types, once created, are immutable. +/// +class Type { +public: + enum TypeClass { + Builtin, Complex, Pointer, Reference, Array, Vector, + FunctionNoProto, FunctionProto, + TypeName, Tagged + }; +private: + QualType CanonicalType; + + /// TypeClass bitfield - Enum that specifies what subclass this belongs to. + /// Note that this should stay at the end of the ivars for Type so that + /// subclasses can pack their bitfields into the same word. + TypeClass TC : 4; +protected: + Type(TypeClass tc, QualType Canonical) + : CanonicalType(Canonical.isNull() ? QualType(this,0) : Canonical), TC(tc){} + virtual ~Type(); + friend class ASTContext; +public: + TypeClass getTypeClass() const { return TC; } + + bool isCanonical() const { return CanonicalType.getTypePtr() == this; } + + /// Types are partitioned into 3 broad categories (C99 6.2.5p1): + /// object types, function types, and incomplete types. + + /// isObjectType - types that fully describe objects. An object is a region + /// of memory that can be examined and stored into (H&S). + bool isObjectType() const; + + /// isFunctionType - types that describe functions. + bool isFunctionType() const; + + /// isIncompleteType - Return true if this is an incomplete type. + /// A type that can describe objects, but which lacks information needed to + /// determine its size (e.g. void, or a fwd declared struct). Clients of this + /// routine will need to determine if the size is actually required. + bool isIncompleteType() const; + + /// Helper methods to distinguish type categories. All type predicates + /// operate on the canonical type, ignoring typedefs. + bool isIntegerType() const; // C99 6.2.5p17 (int, char, bool, enum) + + /// Floating point categories. + bool isRealFloatingType() const; // C99 6.2.5p10 (float, double, long double) + bool isComplexType() const; // C99 6.2.5p11 (complex) + bool isFloatingType() const; // C99 6.2.5p11 (real floating + complex) + bool isRealType() const; // C99 6.2.5p17 (real floating + integer) + bool isArithmeticType() const; // C99 6.2.5p18 (integer + floating) + + /// Vector types + bool isVectorType() const; // GCC vector type. + + /// Derived types (C99 6.2.5p20). isFunctionType() is also a derived type. + bool isDerivedType() const; + bool isPointerType() const; + bool isReferenceType() const; + bool isArrayType() const; + bool isStructureType() const; + bool isUnionType() const; + + bool isVoidType() const; // C99 6.2.5p19 + bool isScalarType() const; // C99 6.2.5p21 (arithmetic + pointers) + bool isAggregateType() const; // C99 6.2.5p21 (arrays, structures) + + /// More type predicates useful for type checking/promotion + bool isPromotableIntegerType() const; // C99 6.3.1.1p2 + + /// isSignedIntegerType - Return true if this is an integer type that is + /// signed, according to C99 6.2.5p4. + bool isSignedIntegerType() const; + + /// isUnsignedIntegerType - Return true if this is an integer type that is + /// unsigned, according to C99 6.2.5p6. Note that this returns true for _Bool. + bool isUnsignedIntegerType() const; + + /// isConstantSizeType - Return true if this is not a variable sized type, + /// according to the rules of C99 6.7.5p3. If Loc is non-null, it is set to + /// the location of the subexpression that makes it a vla type. It is not + /// legal to call this on incomplete types. + bool isConstantSizeType(SourceLocation *Loc = 0) const; + + /// Compatibility predicates used to check assignment expressions. + static bool typesAreCompatible(QualType, QualType); // C99 6.2.7p1 + static bool tagTypesAreCompatible(QualType, QualType); // C99 6.2.7p1 + static bool pointerTypesAreCompatible(QualType, QualType); // C99 6.7.5.1p2 + static bool referenceTypesAreCompatible(QualType, QualType); // C++ 5.17p6 + static bool functionTypesAreCompatible(QualType, QualType); // C99 6.7.5.3p15 + static bool arrayTypesAreCompatible(QualType, QualType); // C99 6.7.5.2p6 +private: + QualType getCanonicalTypeInternal() const { return CanonicalType; } + friend class QualType; +public: + virtual void getAsStringInternal(std::string &InnerString) const = 0; + + static bool classof(const Type *) { return true; } +}; + +/// BuiltinType - This class is used for builtin types like 'int'. Builtin +/// types are always canonical and have a literal name field. +class BuiltinType : public Type { +public: + enum Kind { + Void, + + Bool, // This is bool and/or _Bool. + Char_U, // This is 'char' for targets where char is unsigned. + UChar, // This is explicitly qualified unsigned char. + UShort, + UInt, + ULong, + ULongLong, + + Char_S, // This is 'char' for targets where char is signed. + SChar, // This is explicitly qualified signed char. + Short, + Int, + Long, + LongLong, + + Float, Double, LongDouble + }; +private: + Kind TypeKind; +public: + BuiltinType(Kind K) : Type(Builtin, QualType()), TypeKind(K) {} + + Kind getKind() const { return TypeKind; } + const char *getName() const; + + // the number of bits to represent the builtin type. + unsigned getSize() const; + + virtual void getAsStringInternal(std::string &InnerString) const; + + static bool classof(const Type *T) { return T->getTypeClass() == Builtin; } + static bool classof(const BuiltinType *) { return true; } +}; + +/// ComplexType - C99 6.2.5p11 - Complex values. This supports the C99 complex +/// types (_Complex float etc) as well as the GCC integer complex extensions. +/// +class ComplexType : public Type, public llvm::FoldingSetNode { + QualType ElementType; + ComplexType(QualType Element, QualType CanonicalPtr) : + Type(Complex, CanonicalPtr), ElementType(Element) { + } + friend class ASTContext; // ASTContext creates these. +public: + QualType getElementType() const { return ElementType; } + + virtual void getAsStringInternal(std::string &InnerString) const; + + + void Profile(llvm::FoldingSetNodeID &ID) { + Profile(ID, getElementType()); + } + static void Profile(llvm::FoldingSetNodeID &ID, QualType Element) { + ID.AddPointer(Element.getAsOpaquePtr()); + } + + static bool classof(const Type *T) { return T->getTypeClass() == Complex; } + static bool classof(const ComplexType *) { return true; } +}; + + +/// PointerType - C99 6.7.5.1 - Pointer Declarators. +/// +class PointerType : public Type, public llvm::FoldingSetNode { + QualType PointeeType; + PointerType(QualType Pointee, QualType CanonicalPtr) : + Type(Pointer, CanonicalPtr), PointeeType(Pointee) { + } + friend class ASTContext; // ASTContext creates these. +public: + + QualType getPointeeType() const { return PointeeType; } + + virtual void getAsStringInternal(std::string &InnerString) const; + + + void Profile(llvm::FoldingSetNodeID &ID) { + Profile(ID, getPointeeType()); + } + static void Profile(llvm::FoldingSetNodeID &ID, QualType Pointee) { + ID.AddPointer(Pointee.getAsOpaquePtr()); + } + + static bool classof(const Type *T) { return T->getTypeClass() == Pointer; } + static bool classof(const PointerType *) { return true; } +}; + +/// ReferenceType - C++ 8.3.2 - Reference Declarators. +/// +class ReferenceType : public Type, public llvm::FoldingSetNode { + QualType ReferenceeType; + ReferenceType(QualType Referencee, QualType CanonicalRef) : + Type(Reference, CanonicalRef), ReferenceeType(Referencee) { + } + friend class ASTContext; // ASTContext creates these. +public: + virtual void getAsStringInternal(std::string &InnerString) const; + + QualType getReferenceeType() const { return ReferenceeType; } + + void Profile(llvm::FoldingSetNodeID &ID) { + Profile(ID, getReferenceeType()); + } + static void Profile(llvm::FoldingSetNodeID &ID, QualType Referencee) { + ID.AddPointer(Referencee.getAsOpaquePtr()); + } + + static bool classof(const Type *T) { return T->getTypeClass() == Reference; } + static bool classof(const ReferenceType *) { return true; } +}; + +/// ArrayType - C99 6.7.5.2 - Array Declarators. +/// +class ArrayType : public Type, public llvm::FoldingSetNode { +public: + /// ArraySizeModifier - Capture whether this is a normal array (e.g. int X[4]) + /// an array with a static size (e.g. int X[static 4]), or with a star size + /// (e.g. int X[*]). + enum ArraySizeModifier { + Normal, Static, Star + }; +private: + /// NOTE: These fields are packed into the bitfields space in the Type class. + ArraySizeModifier SizeModifier : 2; + + /// IndexTypeQuals - Capture qualifiers in declarations like: + /// 'int X[static restrict 4]'. + unsigned IndexTypeQuals : 3; + + /// ElementType - The element type of the array. + QualType ElementType; + + /// SizeExpr - The size is either a constant or assignment expression (for + /// Variable Length Arrays). VLA's are only permitted within a function block. + Expr *SizeExpr; + + ArrayType(QualType et, ArraySizeModifier sm, unsigned tq, QualType can, + Expr *e) + : Type(Array, can), SizeModifier(sm), IndexTypeQuals(tq), ElementType(et), + SizeExpr(e) {} + friend class ASTContext; // ASTContext creates these. +public: + + QualType getElementType() const { return ElementType; } + ArraySizeModifier getSizeModifier() const { return SizeModifier; } + unsigned getIndexTypeQualifier() const { return IndexTypeQuals; } + Expr *getSize() const { return SizeExpr; } + + virtual void getAsStringInternal(std::string &InnerString) const; + + void Profile(llvm::FoldingSetNodeID &ID) { + Profile(ID, getSizeModifier(), getIndexTypeQualifier(), getElementType(), + getSize()); + } + static void Profile(llvm::FoldingSetNodeID &ID, + ArraySizeModifier SizeModifier, + unsigned IndexTypeQuals, QualType ElementType, + Expr *SizeExpr) { + ID.AddInteger(SizeModifier); + ID.AddInteger(IndexTypeQuals); + ID.AddPointer(ElementType.getAsOpaquePtr()); + ID.AddPointer(SizeExpr); + } + + static bool classof(const Type *T) { return T->getTypeClass() == Array; } + static bool classof(const ArrayType *) { return true; } +}; + +/// VectorType - +/// +class VectorType : public Type, public llvm::FoldingSetNode { + /// ElementType - The element type of the vector. + QualType ElementType; + + /// NumElements - The number of elements in the vector. + unsigned NumElements; + + VectorType(QualType vecType, unsigned vectorSize, QualType canonType) : + Type(Vector, canonType), ElementType(vecType), NumElements(vectorSize) {} + friend class ASTContext; // ASTContext creates these. +public: + + QualType getElementType() const { return ElementType; } + unsigned getNumElements() const { return NumElements; } + + virtual void getAsStringInternal(std::string &InnerString) const; + + void Profile(llvm::FoldingSetNodeID &ID) { + Profile(ID, getElementType(), getNumElements()); + } + static void Profile(llvm::FoldingSetNodeID &ID, + QualType ElementType, unsigned NumElements) { + ID.AddPointer(ElementType.getAsOpaquePtr()); + ID.AddInteger(NumElements); + } + static bool classof(const Type *T) { return T->getTypeClass() == Vector; } + static bool classof(const VectorType *) { return true; } +}; + +/// FunctionType - C99 6.7.5.3 - Function Declarators. This is the common base +/// class of FunctionTypeNoProto and FunctionTypeProto. +/// +class FunctionType : public Type { + /// SubClassData - This field is owned by the subclass, put here to pack + /// tightly with the ivars in Type. + bool SubClassData : 1; + + // The type returned by the function. + QualType ResultType; +protected: + FunctionType(TypeClass tc, QualType res, bool SubclassInfo,QualType Canonical) + : Type(tc, Canonical), SubClassData(SubclassInfo), ResultType(res) {} + bool getSubClassData() const { return SubClassData; } +public: + + QualType getResultType() const { return ResultType; } + + + static bool classof(const Type *T) { + return T->getTypeClass() == FunctionNoProto || + T->getTypeClass() == FunctionProto; + } + static bool classof(const FunctionType *) { return true; } +}; + +/// FunctionTypeNoProto - Represents a K&R-style 'int foo()' function, which has +/// no information available about its arguments. +class FunctionTypeNoProto : public FunctionType, public llvm::FoldingSetNode { + FunctionTypeNoProto(QualType Result, QualType Canonical) + : FunctionType(FunctionNoProto, Result, false, Canonical) {} + friend class ASTContext; // ASTContext creates these. +public: + // No additional state past what FunctionType provides. + + virtual void getAsStringInternal(std::string &InnerString) const; + + void Profile(llvm::FoldingSetNodeID &ID) { + Profile(ID, getResultType()); + } + static void Profile(llvm::FoldingSetNodeID &ID, QualType ResultType) { + ID.AddPointer(ResultType.getAsOpaquePtr()); + } + + static bool classof(const Type *T) { + return T->getTypeClass() == FunctionNoProto; + } + static bool classof(const FunctionTypeNoProto *) { return true; } +}; + +/// FunctionTypeProto - Represents a prototype with argument type info, e.g. +/// 'int foo(int)' or 'int foo(void)'. 'void' is represented as having no +/// arguments, not as having a single void argument. +class FunctionTypeProto : public FunctionType, public llvm::FoldingSetNode { + FunctionTypeProto(QualType Result, QualType *ArgArray, unsigned numArgs, + bool isVariadic, QualType Canonical) + : FunctionType(FunctionProto, Result, isVariadic, Canonical), + NumArgs(numArgs) { + for (unsigned i = 0; i != numArgs; ++i) + ArgInfo[i] = ArgArray[i]; + } + + /// NumArgs - The number of arguments this function has, not counting '...'. + unsigned NumArgs; + + /// ArgInfo - This array holds the argument types. Note that this is actually + /// a variable-sized array, so it must be the last instance variable in the + /// class. + QualType ArgInfo[1]; + friend class ASTContext; // ASTContext creates these. +public: + unsigned getNumArgs() const { return NumArgs; } + QualType getArgType(unsigned i) const { + assert(i < NumArgs && "Invalid argument number!"); + return ArgInfo[i]; + } + + bool isVariadic() const { return getSubClassData(); } + + typedef const QualType *arg_type_iterator; + arg_type_iterator arg_type_begin() const { return ArgInfo; } + arg_type_iterator arg_type_end() const { return ArgInfo+NumArgs; } + + virtual void getAsStringInternal(std::string &InnerString) const; + + static bool classof(const Type *T) { + return T->getTypeClass() == FunctionProto; + } + static bool classof(const FunctionTypeProto *) { return true; } + + void Profile(llvm::FoldingSetNodeID &ID); + static void Profile(llvm::FoldingSetNodeID &ID, QualType Result, + QualType* ArgTys, unsigned NumArgs, bool isVariadic); +}; + + +class TypedefType : public Type { + TypedefDecl *Decl; + TypedefType(TypedefDecl *D, QualType can) : Type(TypeName, can), Decl(D) { + assert(!isa<TypedefType>(can) && "Invalid canonical type"); + } + friend class ASTContext; // ASTContext creates these. +public: + + TypedefDecl *getDecl() const { return Decl; } + + virtual void getAsStringInternal(std::string &InnerString) const; + + static bool classof(const Type *T) { return T->getTypeClass() == TypeName; } + static bool classof(const TypedefType *) { return true; } +}; + + +class TagType : public Type { + TagDecl *Decl; + TagType(TagDecl *D, QualType can) : Type(Tagged, can), Decl(D) {} + friend class ASTContext; // ASTContext creates these. +public: + + TagDecl *getDecl() const { return Decl; } + + virtual void getAsStringInternal(std::string &InnerString) const; + + static bool classof(const Type *T) { return T->getTypeClass() == Tagged; } + static bool classof(const TagType *) { return true; } +}; + +/// RecordType - This is a helper class that allows the use of isa/cast/dyncast +/// to detect TagType objects of structs/unions/classes. +class RecordType : public TagType { + RecordType(); // DO NOT IMPLEMENT +public: + + RecordDecl *getDecl() const { + return reinterpret_cast<RecordDecl*>(TagType::getDecl()); + } + // FIXME: This predicate is a helper to QualType/Type. It needs to + // recursively check all fields for const-ness. If any field is declared + // const, it needs to return false. + bool hasConstFields() const { return false; } + + static bool classof(const Type *T); + static bool classof(const RecordType *) { return true; } +}; + + +/// ... + +// TODO: When we support C++, we should have types for uses of template with +// default parameters. We should be able to distinguish source use of +// 'std::vector<int>' from 'std::vector<int, std::allocator<int> >'. Though they +// specify the same type, we want to print the default argument only if +// specified in the source code. + +/// getCanonicalType - Return the canonical version of this type, with the +/// appropriate type qualifiers on it. +inline QualType QualType::getCanonicalType() const { + return QualType(getTypePtr()->getCanonicalTypeInternal().getTypePtr(), + getQualifiers() | + getTypePtr()->getCanonicalTypeInternal().getQualifiers()); +} + +} // end namespace clang + +#endif diff --git a/include/clang/Basic/Diagnostic.h b/include/clang/Basic/Diagnostic.h new file mode 100644 index 0000000000..07d38f2d8d --- /dev/null +++ b/include/clang/Basic/Diagnostic.h @@ -0,0 +1,158 @@ +//===--- Diagnostic.h - C Language Family Diagnostic Handling ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Diagnostic-related interfaces. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_DIAGNOSTIC_H +#define LLVM_CLANG_DIAGNOSTIC_H + +#include <string> + +namespace clang { + class DiagnosticClient; + class SourceLocation; + class SourceRange; + + // Import the diagnostic enums themselves. + namespace diag { + /// diag::kind - All of the diagnostics that can be emitted by the frontend. + enum kind { +#define DIAG(ENUM,FLAGS,DESC) ENUM, +#include "DiagnosticKinds.def" + NUM_DIAGNOSTICS + }; + + /// Enum values that allow the client to map NOTEs, WARNINGs, and EXTENSIONs + /// to either MAP_IGNORE (nothing), MAP_WARNING (emit a warning), MAP_ERROR + /// (emit as an error), or MAP_DEFAULT (handle the default way). + enum Mapping { + MAP_DEFAULT = 0, //< Do not map this diagnostic. + MAP_IGNORE = 1, //< Map this diagnostic to nothing, ignore it. + MAP_WARNING = 2, //< Map this diagnostic to a warning. + MAP_ERROR = 3 //< Map this diagnostic to an error. + }; + } + +/// Diagnostic - This concrete class is used by the front-end to report +/// problems and issues. It massages the diagnostics (e.g. handling things like +/// "report warnings as errors" and passes them off to the DiagnosticClient for +/// reporting to the user. +class Diagnostic { + bool WarningsAsErrors; // Treat warnings like errors: + bool WarnOnExtensions; // Enables warnings for gcc extensions: -pedantic. + bool ErrorOnExtensions; // Error on extensions: -pedantic-errors. + DiagnosticClient &Client; + + /// DiagMappings - Mapping information for diagnostics. Mapping info is + /// packed into two bits per diagnostic. + unsigned char DiagMappings[(diag::NUM_DIAGNOSTICS+3)/4]; + + /// ErrorOccurred - This is set to true when an error is emitted, and is + /// sticky. + bool ErrorOccurred; + + unsigned NumDiagnostics; // Number of diagnostics reported + unsigned NumErrors; // Number of diagnostics that are errors +public: + explicit Diagnostic(DiagnosticClient &client); + + //===--------------------------------------------------------------------===// + // Diagnostic characterization methods, used by a client to customize how + // + const DiagnosticClient &getClient() const { return Client; }; + + /// setWarningsAsErrors - When set to true, any warnings reported are issued + /// as errors. + void setWarningsAsErrors(bool Val) { WarningsAsErrors = Val; } + bool getWarningsAsErrors() const { return WarningsAsErrors; } + + /// setWarnOnExtensions - When set to true, issue warnings on GCC extensions, + /// the equivalent of GCC's -pedantic. + void setWarnOnExtensions(bool Val) { WarnOnExtensions = Val; } + bool getWarnOnExtensions() const { return WarnOnExtensions; } + + /// setErrorOnExtensions - When set to true issue errors for GCC extensions + /// instead of warnings. This is the equivalent to GCC's -pedantic-errors. + void setErrorOnExtensions(bool Val) { ErrorOnExtensions = Val; } + bool getErrorOnExtensions() const { return ErrorOnExtensions; } + + /// setDiagnosticMapping - This allows the client to specify that certain + /// warnings are ignored. Only NOTEs, WARNINGs, and EXTENSIONs can be mapped. + void setDiagnosticMapping(diag::kind Diag, diag::Mapping Map) { + assert(isNoteWarningOrExtension(Diag) && "Cannot map errors!"); + unsigned char &Slot = DiagMappings[Diag/4]; + unsigned Bits = (Diag & 3)*2; + Slot &= ~(3 << Bits); + Slot |= Map << Bits; + } + + /// getDiagnosticMapping - Return the mapping currently set for the specified + /// diagnostic. + diag::Mapping getDiagnosticMapping(diag::kind Diag) const { + return (diag::Mapping)((DiagMappings[Diag/4] >> (Diag & 3)*2) & 3); + } + + bool hasErrorOccurred() const { return ErrorOccurred; } + + unsigned getNumErrors() const { return NumErrors; } + unsigned getNumDiagnostics() const { return NumDiagnostics; } + + //===--------------------------------------------------------------------===// + // Diagnostic classification and reporting interfaces. + // + + /// getDescription - Given a diagnostic ID, return a description of the + /// issue. + static const char *getDescription(unsigned DiagID); + + /// Level - The level of the diagnostic + enum Level { + Ignored, Note, Warning, Error, Fatal, Sorry + }; + + /// isNoteWarningOrExtension - Return true if the unmapped diagnostic level of + /// the specified diagnostic ID is a Note, Warning, or Extension. + static bool isNoteWarningOrExtension(unsigned DiagID); + + /// getDiagnosticLevel - Based on the way the client configured the Diagnostic + /// object, classify the specified diagnostic ID into a Level, consumable by + /// the DiagnosticClient. + Level getDiagnosticLevel(unsigned DiagID) const; + + /// Report - Issue the message to the client. DiagID is a member of the + /// diag::kind enum. + void Report(SourceLocation Pos, unsigned DiagID, + const std::string *Strs = 0, unsigned NumStrs = 0, + const SourceRange *Ranges = 0, unsigned NumRanges = 0); +}; + +/// DiagnosticClient - This is an abstract interface implemented by clients of +/// the front-end, which formats and prints fully processed diagnostics. +class DiagnosticClient { +public: + virtual ~DiagnosticClient(); + + /// IgnoreDiagnostic - If the client wants to ignore this diagnostic, then + /// return true. + virtual bool IgnoreDiagnostic(Diagnostic::Level DiagLevel, + SourceLocation Pos) = 0; + + /// HandleDiagnostic - Handle this diagnostic, reporting it to the user or + /// capturing it to a log as needed. + virtual void HandleDiagnostic(Diagnostic::Level DiagLevel, SourceLocation Pos, + diag::kind ID, const std::string *Strs, + unsigned NumStrs, const SourceRange *Ranges, + unsigned NumRanges) = 0; +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Basic/DiagnosticKinds.def b/include/clang/Basic/DiagnosticKinds.def new file mode 100644 index 0000000000..8addaa493c --- /dev/null +++ b/include/clang/Basic/DiagnosticKinds.def @@ -0,0 +1,668 @@ +//===-- DiagnosticKinds.def - C Family Diagnostic Kind Database -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the DiagnosticKind database. +// +//===----------------------------------------------------------------------===// + +// Flags for diagnostic: +// +// DIAG_TYPE - Allows one of: +// NOTE - Informational message. +// WARNING - Warning. +// EXTENSION - Notification that an extension to the language is being used. +// ERROR - Error, compilation will stop after parsing completes. +// FATAL - Fatal error: parsing must stop. + +//===----------------------------------------------------------------------===// +// Portability +//===----------------------------------------------------------------------===// + +DIAG(port_target_macro_use, NOTE, + "use of a target-specific macro, source is not 'portable'") + +DIAG(port_target_builtin_use, NOTE, + "use of a target-specific builtin function, source is not 'portable'") + +DIAG(port_wchar_t, NOTE, + "sizeof(wchar_t) varies between targets, source is not 'portable'") + +//===----------------------------------------------------------------------===// +// Lexer Diagnostics +//===----------------------------------------------------------------------===// + +DIAG(null_in_string, WARNING, + "null character(s) preserved in string literal") +DIAG(null_in_char , WARNING, + "null character(s) preserved in character literal") +DIAG(null_in_file , WARNING, + "null character ignored") +DIAG(nested_block_comment, WARNING, + "\"/*\" within block comment") +DIAG(escaped_newline_block_comment_end, WARNING, + "escaped newline between */ characters at block comment end") +DIAG(backslash_newline_space, WARNING, + "backslash and newline separated by space") + +// Trigraphs. +DIAG(trigraph_ignored, WARNING, "trigraph ignored") +DIAG(trigraph_ignored_block_comment, WARNING, + "ignored trigraph would end block comment") +DIAG(trigraph_ends_block_comment, WARNING, + "trigraph ends block comment") +DIAG(trigraph_converted, WARNING, + "trigraph converted to '%0' character") + +DIAG(ext_multi_line_bcpl_comment, EXTENSION, + "multi-line // comment") +DIAG(ext_bcpl_comment, EXTENSION, + "// comments are not allowed in this language") +DIAG(ext_no_newline_eof, EXTENSION, + "no newline at end of file") +DIAG(ext_backslash_newline_eof, EXTENSION, + "backslash-newline at end of file") +DIAG(ext_dollar_in_identifier, EXTENSION, + "'$' in identifier") +DIAG(charize_microsoft_ext, EXTENSION, + "@# is a microsoft extension") + +DIAG(ext_token_used, EXTENSION, + "extension used") + +DIAG(err_unterminated_string, ERROR, + "missing terminating \" character") +DIAG(err_unterminated_char, ERROR, + "missing terminating ' character") +DIAG(err_empty_character, ERROR, + "empty character constant") +DIAG(err_unterminated_block_comment, ERROR, + "unterminated /* comment") +DIAG(err_invalid_character_to_charify, ERROR, + "invalid argument to convert to character") + +//===----------------------------------------------------------------------===// +// Preprocessor Diagnostics +//===----------------------------------------------------------------------===// + +DIAG(pp_hash_warning, WARNING, + "#warning%0") +DIAG(pp_include_next_in_primary, WARNING, + "#include_next in primary source file") +DIAG(pp_include_next_absolute_path, WARNING, + "#include_next with absolute path") +DIAG(ext_c99_whitespace_required_after_macro_name, WARNING, + "ISO C99 requires whitespace after the macro name") +DIAG(pp_pragma_once_in_main_file, WARNING, + "#pragma once in main file") +DIAG(pp_pragma_sysheader_in_main_file, WARNING, + "#pragma system_header ignored in main file") +DIAG(pp_poisoning_existing_macro, WARNING, + "poisoning existing macro") +DIAG(pp_out_of_date_dependency, WARNING, + "current file is older than dependency %0") +DIAG(pp_undef_builtin_macro, WARNING, + "undefining builtin macro") +DIAG(pp_redef_builtin_macro, WARNING, + "redefining builtin macro") +DIAG(pp_macro_not_used, WARNING, // -Wunused-macros + "macro is not used") +DIAG(pp_invalid_string_literal, WARNING, + "invalid string literal, ignoring final '\\'") +DIAG(warn_pp_expr_overflow, WARNING, + "integer overflow in preprocessor expression") +DIAG(warn_pp_convert_lhs_to_positive, WARNING, + "left side of operator converted from negative value to unsigned: %0") +DIAG(warn_pp_convert_rhs_to_positive, WARNING, + "right side of operator converted from negative value to unsigned: %0") + +DIAG(ext_pp_import_directive, EXTENSION, + "#import is a language extension") +DIAG(ext_pp_ident_directive, EXTENSION, + "#ident is a language extension") +DIAG(ext_pp_include_next_directive, EXTENSION, + "#include_next is a language extension") +DIAG(ext_pp_warning_directive, EXTENSION, + "#warning is a language extension") +DIAG(ext_pp_extra_tokens_at_eol, EXTENSION, + "extra tokens at end of %0 directive") +DIAG(ext_pp_comma_expr, EXTENSION, + "comma operator in operand of #if") +DIAG(ext_pp_bad_vaargs_use, EXTENSION, + "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro") +DIAG(ext_pp_macro_redef, EXTENSION, + "\"%0\" macro redefined") +DIAG(ext_pp_macro_redef2, EXTENSION, + "this is previous definition") +DIAG(ext_variadic_macro, EXTENSION, + "variadic macros were introduced in C99") +DIAG(ext_named_variadic_macro, EXTENSION, + "named variadic macros are a GNU extension") +DIAG(ext_embedded_directive, EXTENSION, + "embedding a directive within macro arguments is not portable") +DIAG(ext_missing_varargs_arg, EXTENSION, + "varargs argument missing, but tolerated as an extension") +DIAG(ext_empty_fnmacro_arg, EXTENSION, + "empty macro arguments were standardized in C99") + +DIAG(ext_pp_base_file, EXTENSION, + "__BASE_FILE__ is a language extension") +DIAG(ext_pp_include_level, EXTENSION, + "__INCLUDE_LEVEL__ is a language extension") +DIAG(ext_pp_timestamp, EXTENSION, + "__TIMESTAMP__ is a language extension") + +DIAG(err_pp_invalid_directive, ERROR, + "invalid preprocessing directive") +DIAG(err_pp_hash_error, ERROR, + "#error%0") +DIAG(err_pp_file_not_found, ERROR, + "'%0' file not found") +DIAG(err_pp_empty_filename, ERROR, + "empty filename") +DIAG(err_pp_include_too_deep, ERROR, + "#include nested too deeply") +DIAG(err_pp_expects_filename, ERROR, + "expected \"FILENAME\" or <FILENAME>") +DIAG(err_pp_macro_not_identifier, ERROR, + "macro names must be identifiers") +DIAG(err_pp_missing_macro_name, ERROR, + "macro name missing") +DIAG(err_pp_missing_rparen_in_macro_def, ERROR, + "missing ')' in macro parameter list") +DIAG(err_pp_invalid_tok_in_arg_list, ERROR, + "invalid token in macro parameter list") +DIAG(err_pp_expected_ident_in_arg_list, ERROR, + "expected identifier in macro parameter list") +DIAG(err_pp_expected_comma_in_arg_list, ERROR, + "expected comma in macro parameter list") +DIAG(err_pp_duplicate_name_in_arg_list, ERROR, + "duplicate macro parameter name \"%0\"") +DIAG(err_pp_stringize_not_parameter, ERROR, + "'#' is not followed by a macro parameter") +DIAG(err_pp_malformed_ident, ERROR, + "invalid #ident directive") +DIAG(err_pp_unterminated_conditional, ERROR, + "unterminated conditional directive") +DIAG(pp_err_else_after_else, ERROR, + "#else after #else") +DIAG(pp_err_elif_after_else, ERROR, + "#elif after #else") +DIAG(pp_err_else_without_if, ERROR, + "#else without #if") +DIAG(pp_err_elif_without_if, ERROR, + "#elif without #if") +DIAG(err_pp_endif_without_if, ERROR, + "#endif without #if") +DIAG(err_pp_expected_value_in_expr, ERROR, + "expected value in expression") +DIAG(err_pp_missing_val_before_operator, ERROR, + "missing value before operator") +DIAG(err_pp_expected_rparen, ERROR, + "expected ')' in preprocessor expression") +DIAG(err_pp_expected_eol, ERROR, + "expected end of line in preprocessor expression") +DIAG(err_pp_defined_requires_identifier, ERROR, + "operator \"defined\" requires an identifier") +DIAG(err_pp_missing_rparen, ERROR, + "missing ')' after \"defined\"") +DIAG(err_pp_colon_without_question, ERROR, + "':' without preceding '?'") +DIAG(err_pp_question_without_colon, ERROR, + "'?' without following ':'") +DIAG(err_pp_division_by_zero, ERROR, + "division by zero in preprocessor expression") +DIAG(err_pp_remainder_by_zero, ERROR, + "remainder by zero in preprocessor expression") +DIAG(err_pp_expr_bad_token, ERROR, + "token is not valid in preprocessor expressions") +DIAG(err_pp_invalid_poison, ERROR, + "can only poison identifier tokens") +DIAG(err_pp_used_poisoned_id, ERROR, + "attempt to use a poisoned identifier") +DIAG(err__Pragma_malformed, ERROR, + "_Pragma takes a parenthesized string literal") +DIAG(err_defined_macro_name, ERROR, + "\"defined\" cannot be used as a macro name") +DIAG(err_paste_at_start, ERROR, + "\"##\" cannot appear at start of macro expansion") +DIAG(err_paste_at_end, ERROR, + "\"##\" cannot appear at end of macro expansion") +DIAG(err_unterm_macro_invoc, ERROR, + "unterminated function-like macro invocation") +DIAG(err_too_many_args_in_macro_invoc, ERROR, + "too many arguments provided to function-like macro invocation") +DIAG(err_too_few_args_in_macro_invoc, ERROR, + "too few arguments provided to function-like macro invocation") +DIAG(err_pp_bad_paste, ERROR, + "pasting formed \"%0\", an invalid preprocessing token") +DIAG(err_pp_operator_used_as_macro_name, ERROR, + "C++ operator \"%0\" cannot be used as a macro name") +DIAG(err_pp_illegal_floating_literal, ERROR, + "floating point literal in preprocessor expression") + +// Should be a sorry? +DIAG(err_pp_I_dash_not_supported, ERROR, + "-I- not supported, please use -iquote instead") + +//===----------------------------------------------------------------------===// +// Parser Diagnostics +//===----------------------------------------------------------------------===// + +DIAG(w_type_defaults_to_int, WARNING, + "type defaults to 'int'") +DIAG(w_no_declarators, WARNING, + "declaration does not declare anything") +DIAG(w_asm_qualifier_ignored, WARNING, + "ignored %0 qualifier on asm") + +DIAG(ext_empty_source_file, EXTENSION, + "ISO C forbids an empty source file") +DIAG(ext_top_level_semi, EXTENSION, + "ISO C does not allow an extra ';' outside of a function") +DIAG(ext_extra_struct_semi, EXTENSION, + "ISO C does not allow an extra ';' inside a struct or union") +DIAG(ext_duplicate_declspec, EXTENSION, + "duplicate '%0' declaration specifier") +DIAG(ext_plain_complex, EXTENSION, + "ISO C does not support plain '_Complex' meaning '_Complex double'") +DIAG(ext_integer_complex, EXTENSION, + "ISO C does not support complex integer types") +DIAG(ext_thread_before, EXTENSION, + "'__thread' before 'static'") + +DIAG(ext_empty_struct_union_enum, EXTENSION, + "use of empty %0 extension") + +DIAG(ext_ident_list_in_param, EXTENSION, + "type-less parameter names in function declaration") +DIAG(ext_c99_array_usage, EXTENSION, + "use of C99-specific array features") +DIAG(ext_c99_variable_decl_in_for_loop, EXTENSION, + "variable declaration in for loop is a C99-specific feature") +DIAG(ext_c99_compound_literal, EXTENSION, + "compound literals are a C99-specific feature") +DIAG(ext_c99_enumerator_list_comma, EXTENSION, + "commas at the end of enumerator lists are a C99-specific feature") + +DIAG(ext_gnu_indirect_goto, EXTENSION, + "use of GNU indirect-goto extension") +DIAG(ext_gnu_address_of_label, EXTENSION, + "use of GNU address-of-label extension") +DIAG(ext_gnu_statement_expr, EXTENSION, + "use of GNU statement expression extension") +DIAG(ext_gnu_conditional_expr, EXTENSION, + "use of GNU ?: expression extension, eliding middle term") +DIAG(ext_gnu_empty_initializer, EXTENSION, + "use of GNU empty initializer extension") +DIAG(ext_gnu_array_range, EXTENSION, + "use of GNU array range extension") +DIAG(ext_gnu_missing_equal_designator, EXTENSION, + "use of GNU 'missing =' extension in designator") +DIAG(ext_gnu_old_style_field_designator, EXTENSION, + "use of GNU old-style field designator extension") +DIAG(ext_gnu_case_range, EXTENSION, + "use of GNU case range extension") + +// Generic errors. +DIAG(err_parse_error, ERROR, + "parse error") +DIAG(err_expected_expression, ERROR, + "expected expression") +DIAG(err_expected_external_declaration, ERROR, + "expected external declaration") +DIAG(err_expected_ident, ERROR, + "expected identifier") +DIAG(err_expected_ident_lparen, ERROR, + "expected identifier or '('") +DIAG(err_expected_ident_lbrace, ERROR, + "expected identifier or '{'") +DIAG(err_expected_rparen, ERROR, + "expected ')'") +DIAG(err_expected_rsquare, ERROR, + "expected ']'") +DIAG(err_expected_rbrace, ERROR, + "expected '}'") +DIAG(err_expected_greater, ERROR, + "expected '>'") +DIAG(err_expected_semi_decl_list, ERROR, + "expected ';' at end of declaration list") +DIAG(ext_expected_semi_decl_list, EXTENSION, + "expected ';' at end of declaration list") +DIAG(err_expected_fn_body, ERROR, + "expected function body after function declarator") +DIAG(err_expected_after_declarator, ERROR, + "expected '=', ',', ';', 'asm', or '__attribute__' after declarator") +DIAG(err_expected_statement, ERROR, + "expected statement") +DIAG(err_expected_lparen_after, ERROR, + "expected '(' after '%0'") +DIAG(err_expected_less_after, ERROR, + "expected '<' after '%0'") +DIAG(err_expected_comma, ERROR, + "expected ','") +DIAG(err_expected_lbrace_in_compound_literal, ERROR, + "expected '{' in compound literal") +DIAG(err_expected_while, ERROR, + "expected 'while' in do/while loop") +DIAG(err_expected_semi_after, ERROR, + "expected ';' after %0") +DIAG(err_expected_semi_after_expr, ERROR, + "expected ';' after expression") +DIAG(err_expected_semi_for, ERROR, + "expected ';' in 'for' statement specifier") +DIAG(err_expected_colon_after, ERROR, + "expected ':' after %0") +DIAG(err_label_end_of_compound_statement, ERROR, + "label at end of compound statement: expected statement") +DIAG(err_expected_colon, ERROR, + "expected ':'") +DIAG(err_expected_string_literal, ERROR, + "expected string literal") +DIAG(err_expected_asm_operand, ERROR, + "expected string literal or '[' for asm operand") + +DIAG(err_unexpected_at, ERROR, + "unexpected '@' in program") + +/// err_matching - this is used as a continuation of a previous error, e.g. to +/// specify the '(' when we expected a ')'. This should probably be some +/// special sort of diagnostic kind to indicate that it is the second half of +/// the previous diagnostic. +DIAG(err_matching, ERROR, + "to match this '%0'") + +//===----------------------------------------------------------------------===// +// Semantic Analysis +//===----------------------------------------------------------------------===// + +// Semantic analysis of string and character constant literals. +DIAG(ext_nonstandard_escape, EXTENSION, + "use of non-standard escape character '\\%0'") +DIAG(ext_unknown_escape, EXTENSION, + "unknown escape sequence '\\%0'") +DIAG(warn_extraneous_wide_char_constant, WARNING, + "extraneous characters in wide character constant ignored") +DIAG(warn_char_constant_too_large, WARNING, + "character constant too long for its type") +DIAG(warn_hex_escape_too_large, WARNING, + "hex escape sequence out of range") +DIAG(warn_octal_escape_too_large, WARNING, + "octal escape sequence out of range") + +DIAG(err_hex_escape_no_digits, ERROR, + "\\x used with no following hex digits") + +// Declarations. +DIAG(err_typename_requires_specqual, ERROR, + "type name requires a specifier or qualifier") +DIAG(err_typename_invalid_storageclass, ERROR, + "type name does not allow storage class to be specified") +DIAG(err_typename_invalid_functionspec, ERROR, + "type name does not allow function specifier to be specified") +DIAG(err_invalid_decl_spec_combination, ERROR, + "cannot combine with previous '%0' declaration specifier") +DIAG(err_invalid_sign_spec, ERROR, + "'%0' cannot be signed or unsigned") +DIAG(err_invalid_short_spec, ERROR, + "'short %0' is invalid") +DIAG(err_invalid_long_spec, ERROR, + "'long %0' is invalid") +DIAG(err_invalid_longlong_spec, ERROR, + "'long long %0' is invalid") +DIAG(err_invalid_complex_spec, ERROR, + "'_Complex %0' is invalid") +DIAG(err_invalid_thread_spec, ERROR, + "'__thread %0' is invalid") +DIAG(err_ellipsis_first_arg, ERROR, + "ISO C requires a named argument before '...'") +DIAG(err_unspecified_vla_size_with_static, ERROR, + "'static' may not be used with an unspecified variable length array size") +DIAG(err_invalid_storage_class_in_func_decl, ERROR, + "invalid storage class specifier in function declarator") +DIAG(err_invalid_reference_qualifier_application, ERROR, + "'%0' qualifier may not be applied to a reference") + +// Attributes +DIAG(err_attribute_wrong_number_arguments, ERROR, + "attribute requires %0 argument(s)") +DIAG(err_attribute_invalid_vector_type, ERROR, + "invalid vector type '%0'") +DIAG(err_attribute_vector_size_not_int, ERROR, + "vector_size requires integer constant") +DIAG(err_attribute_invalid_size, ERROR, + "vector size not an integral multiple of component size") +DIAG(err_attribute_zero_size, ERROR, + "zero vector size") +DIAG(err_typecheck_vector_not_convertable, ERROR, + "can't convert between vector values of different size ('%0' and '%1')") + +// Function Parameter Semantic Analysis. +DIAG(err_void_param_with_identifier, ERROR, + "void argument may not have a name") +DIAG(err_void_only_param, ERROR, + "'void' must be the first and only parameter if specified") +DIAG(err_void_param_qualified, ERROR, + "'void' as parameter must not have type qualifiers") +DIAG(err_param_redefinition, ERROR, + "redefinition of parameter '%0'") +DIAG(err_ident_list_in_fn_declaration, ERROR, + "a parameter list without types is only allowed in a function definition") +DIAG(err_declaration_does_not_declare_param, ERROR, + "declaration does not declare a parameter") +DIAG(err_no_matching_param, ERROR, + "parameter named '%0' is missing") +DIAG(ext_param_not_declared, EXTENSION, + "parameter '%0' was not declared, defaulting to type 'int'") + +DIAG(err_previous_definition, ERROR, + "previous definition is here") +DIAG(err_previous_use, ERROR, + "previous use is here") + +DIAG(err_unexpected_typedef, ERROR, + "unexpected type name '%0': expected expression") +DIAG(err_undeclared_var_use, ERROR, + "use of undeclared identifier '%0'") +DIAG(err_redefinition, ERROR, + "redefinition of '%0'") +DIAG(err_redefinition_different_kind, ERROR, + "redefinition of '%0' as different kind of symbol") +DIAG(err_nested_redefinition, ERROR, + "nested redefinition of '%0'") +DIAG(err_use_with_wrong_tag, ERROR, + "use of '%0' with tag type that does not match previous declaration") +DIAG(ext_forward_ref_enum, EXTENSION, + "ISO C forbids forward references to 'enum' types") +DIAG(err_redefinition_of_enumerator, ERROR, + "redefinition of enumerator '%0'") +DIAG(err_duplicate_member, ERROR, + "duplicate member '%0'") +DIAG(err_enum_value_not_integer_constant_expr, ERROR, + "enumerator value for '%0' is not an integer constant") +DIAG(err_case_label_not_integer_constant_expr, ERROR, + "case label does not reduce to an integer constant") +DIAG(err_typecheck_illegal_vla, ERROR, + "variable length array declared outside of any function") +DIAG(err_typecheck_negative_array_size, ERROR, + "array size is negative") +DIAG(err_typecheck_zero_array_size, EXTENSION, + "zero size arrays are an extension") +DIAG(err_array_size_non_int, ERROR, + "size of array has non-integer type '%0'") + +DIAG(err_redefinition_of_label, ERROR, + "redefinition of label '%0'") +DIAG(err_undeclared_label_use, ERROR, + "use of undeclared label '%0'") + +DIAG(warn_implicit_function_decl, WARNING, + "implicit declaration of function '%0'") +DIAG(ext_implicit_function_decl, EXTENSION, + "implicit declaration of function '%0' is invalid in C99") + +DIAG(err_field_declared_as_function, ERROR, + "field '%0' declared as a function") +DIAG(err_field_incomplete, ERROR, + "field '%0' has incomplete type") +DIAG(err_variable_sized_type_in_struct, EXTENSION, + "variable sized type '%0' must be at end of struct or class") +DIAG(err_flexible_array_empty_struct, ERROR, + "flexible array '%0' not allowed in otherwise empty struct") +DIAG(ext_flexible_array_in_struct, EXTENSION, + "'%0' may not be nested in a struct due to flexible array member") +DIAG(err_flexible_array_in_array, ERROR, + "'%0' may not be used as an array element due to flexible array member") +DIAG(err_illegal_decl_array_of_functions, ERROR, + "'%0' declared as array of functions") +DIAG(err_illegal_decl_array_incomplete_type, ERROR, + "array has incomplete element type '%0'") +DIAG(err_illegal_decl_array_of_references, ERROR, + "'%0' declared as array of references") +DIAG(err_illegal_decl_pointer_to_reference, ERROR, + "'%0' declared as a pointer to a reference") +DIAG(err_illegal_decl_reference_to_reference, ERROR, + "'%0' declared as a reference to a reference") + +// Expressions. +DIAG(ext_sizeof_function_type, EXTENSION, + "invalid application of 'sizeof' to a function type") +DIAG(ext_sizeof_void_type, EXTENSION, + "invalid application of '%0' to a void type") +DIAG(err_sizeof_incomplete_type, ERROR, + "invalid application of 'sizeof' to an incomplete type '%0'") +DIAG(err_alignof_incomplete_type, ERROR, + "invalid application of '__alignof' to an incomplete type '%0'") +DIAG(err_invalid_suffix_integer_constant, ERROR, + "invalid suffix '%0' on integer constant") +DIAG(err_invalid_suffix_float_constant, ERROR, + "invalid suffix '%0' on floating constant") +DIAG(warn_integer_too_large, WARNING, + "integer constant is too large for its type") +DIAG(warn_integer_too_large_for_signed, WARNING, + "integer constant is so large that it is unsigned") +DIAG(err_exponent_has_no_digits, ERROR, + "exponent has no digits") +DIAG(ext_binary_literal, EXTENSION, + "binary integer literals are an extension") +DIAG(err_invalid_binary_digit, ERROR, + "invalid digit '%0' in binary constant") +DIAG(err_invalid_octal_digit, ERROR, + "invalid digit '%0' in octal constant") +DIAG(err_invalid_decimal_digit, ERROR, + "invalid digit '%0' in decimal constant") +DIAG(err_hexconstant_requires_exponent, ERROR, + "hexadecimal floating constants require an exponent") +DIAG(err_typecheck_subscript_value, ERROR, + "subscripted value is neither array nor pointer") +DIAG(err_typecheck_subscript, ERROR, + "array subscript is not an integer") +DIAG(err_typecheck_subscript_not_object, ERROR, + "illegal subscript of non-object type '%0'") +DIAG(err_typecheck_member_reference_structUnion, ERROR, + "member reference is not to a structure or union") +DIAG(err_typecheck_member_reference_arrow, ERROR, + "member reference is not a pointer") +DIAG(err_typecheck_incomplete_tag, ERROR, + "incomplete definition of type '%0'") +DIAG(err_typecheck_no_member, ERROR, + "no member named '%0'") +DIAG(err_typecheck_illegal_increment_decrement, ERROR, + "cannot modify value of type '%0'") +DIAG(err_typecheck_invalid_lvalue_incr_decr, ERROR, + "invalid lvalue in increment/decrement expression") +DIAG(err_typecheck_arithmetic_incomplete_type, ERROR, + "arithmetic on pointer to incomplete type '%0'") +DIAG(err_typecheck_decl_incomplete_type, ERROR, + "variable has incomplete type '%0'") +DIAG(err_typecheck_sclass_fscope, ERROR, + "illegal storage class on file-scoped variable") +DIAG(err_typecheck_sclass_func, ERROR, + "illegal storage class on function") +DIAG(err_typecheck_address_of_register, ERROR, + "address of register variable requested") +DIAG(err_typecheck_invalid_lvalue_addrof, ERROR, + "invalid lvalue in address expression") +DIAG(err_typecheck_unary_expr, ERROR, + "invalid argument type to unary expression '%0'") +DIAG(err_typecheck_indirection_requires_pointer, ERROR, + "indirection requires pointer operand ('%0' invalid)") +DIAG(err_typecheck_deref_incomplete_type, ERROR, + "dereferencing pointer to incomplete type '%0'") +DIAG(ext_typecheck_deref_ptr_to_void, EXTENSION, + "dereferencing '%0' pointer") +DIAG(err_typecheck_invalid_operands, ERROR, + "invalid operands to binary expression ('%0' and '%1')") +DIAG(ext_typecheck_comparison_of_pointer_integer, EXTENSION, + "comparison between pointer and integer") +DIAG(err_typecheck_assign_const, ERROR, + "read-only variable is not assignable") +DIAG(err_typecheck_assign_incompatible, ERROR, + "incompatible types assigning '%1' to '%0'") +DIAG(ext_typecheck_assign_pointer_int, EXTENSION, + "incompatible types assigning '%1' to '%0'") +DIAG(ext_typecheck_assign_incompatible_pointer, EXTENSION, + "incompatible pointer types assigning '%1' to '%0'") +DIAG(ext_typecheck_assign_discards_qualifiers, EXTENSION, + "assigning '%1' to '%0' discards qualifiers") +DIAG(err_typecheck_array_not_modifiable_lvalue, ERROR, + "array type '%0' is not assignable") +DIAG(err_typecheck_non_object_not_modifiable_lvalue, ERROR, + "non-object type '%0' is not assignable") +DIAG(err_typecheck_expression_not_modifiable_lvalue, ERROR, + "expression is not assignable") +DIAG(err_typecheck_incomplete_type_not_modifiable_lvalue, ERROR, + "incomplete type '%0' is not assignable") +DIAG(err_typecheck_call_not_function, ERROR, + "called object is not a function or function pointer") +DIAG(err_typecheck_call_too_few_args, ERROR, + "too few arguments to function") +DIAG(err_typecheck_call_too_many_args, ERROR, + "too many arguments to function") +DIAG(err_typecheck_passing_incompatible, ERROR, + "incompatible types passing '%0' to function expecting '%1'") +DIAG(ext_typecheck_passing_incompatible_pointer, EXTENSION, + "incompatible pointer types passing '%0' to function expecting '%1'") +DIAG(ext_typecheck_passing_pointer_int, EXTENSION, + "incompatible types passing '%1' to function expecting '%0'") +DIAG(ext_typecheck_passing_discards_qualifiers, EXTENSION, + "passing '%0' to '%1' discards qualifiers") +DIAG(err_typecheck_cond_expect_scalar, ERROR, + "used type '%0' where arithmetic or pointer type is required") +DIAG(err_typecheck_cond_incompatible_operands, ERROR, + "incompatible operand types ('%0' and '%1')") +DIAG(ext_typecheck_cond_incompatible_pointers, EXTENSION, + "pointer type mismatch ('%0' and '%1')") + +DIAG(warn_unused_expr, WARNING, + "expression result unused") + +// Statements. +DIAG(err_continue_not_in_loop, ERROR, + "'continue' statement not in loop statement") +DIAG(err_break_not_in_loop_or_switch, ERROR, + "'break' statement not in loop or switch statement") +DIAG(err_typecheck_return_incompatible, ERROR, + "incompatible type returning '%1', expected '%0'") +DIAG(ext_typecheck_return_pointer_int, EXTENSION, + "incompatible type returning '%1', expected '%0'") +DIAG(ext_typecheck_return_incompatible_pointer, EXTENSION, + "incompatible pointer type returning '%1', expected '%0'") +DIAG(ext_typecheck_return_discards_qualifiers, EXTENSION, + "returning '%1' from function expecting '%0' discards qualifiers") +DIAG(err_typecheck_statement_requires_scalar, ERROR, + "statement requires expression of scalar type ('%0' invalid)") + +DIAG(warn_return_missing_expr, WARNING, + "non-void function '%0' should return a value") +DIAG(ext_return_missing_expr, EXTENSION, + "non-void function '%0' should return a value") +DIAG(ext_return_has_expr, EXTENSION, + "void function '%0' should not return a value") + +#undef DIAG diff --git a/include/clang/Basic/FileManager.h b/include/clang/Basic/FileManager.h new file mode 100644 index 0000000000..ad955d7280 --- /dev/null +++ b/include/clang/Basic/FileManager.h @@ -0,0 +1,111 @@ +//===--- FileManager.h - File System Probing and Caching --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the FileManager interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FILEMANAGER_H +#define LLVM_CLANG_FILEMANAGER_H + +#include "llvm/ADT/StringMap.h" +#include <map> +#include <string> +// FIXME: Enhance libsystem to support inode and other fields in stat. +#include <sys/types.h> + +namespace clang { +class FileManager; + +/// DirectoryEntry - Cached information about one directory on the disk. +/// +class DirectoryEntry { + const char *Name; // Name of the directory. + friend class FileManager; +public: + DirectoryEntry() : Name(0) {} + const char *getName() const { return Name; } +}; + +/// FileEntry - Cached information about one file on the disk. +/// +class FileEntry { + const char *Name; // Name of the directory. + off_t Size; // File size in bytes. + time_t ModTime; // Modification time of file. + const DirectoryEntry *Dir; // Directory file lives in. + unsigned UID; // A unique (small) ID for the file. + friend class FileManager; +public: + FileEntry() : Name(0) {} + + const char *getName() const { return Name; } + off_t getSize() const { return Size; } + unsigned getUID() const { return UID; } + time_t getModificationTime() const { return ModTime; } + + /// getDir - Return the directory the file lives in. + /// + const DirectoryEntry *getDir() const { return Dir; } +}; + + +/// FileManager - Implements support for file system lookup, file system +/// caching, and directory search management. This also handles more advanced +/// properties, such as uniquing files based on "inode", so that a file with two +/// names (e.g. symlinked) will be treated as a single file. +/// +class FileManager { + /// UniqueDirs/UniqueFiles - Cache from ID's to existing directories/files. + /// + std::map<std::pair<dev_t, ino_t>, DirectoryEntry> UniqueDirs; + std::map<std::pair<dev_t, ino_t>, FileEntry> UniqueFiles; + + /// DirEntries/FileEntries - This is a cache of directory/file entries we have + /// looked up. The actual Entry is owned by UniqueFiles/UniqueDirs above. + /// + llvm::StringMap<DirectoryEntry*> DirEntries; + llvm::StringMap<FileEntry*> FileEntries; + + /// NextFileUID - Each FileEntry we create is assigned a unique ID #. + /// + unsigned NextFileUID; + + // Statistics. + unsigned NumDirLookups, NumFileLookups; + unsigned NumDirCacheMisses, NumFileCacheMisses; +public: + FileManager() : DirEntries(64), FileEntries(64), NextFileUID(0) { + NumDirLookups = NumFileLookups = 0; + NumDirCacheMisses = NumFileCacheMisses = 0; + } + + /// getDirectory - Lookup, cache, and verify the specified directory. This + /// returns null if the directory doesn't exist. + /// + const DirectoryEntry *getDirectory(const std::string &Filename) { + return getDirectory(&Filename[0], &Filename[0] + Filename.size()); + } + const DirectoryEntry *getDirectory(const char *FileStart,const char *FileEnd); + + /// getFile - Lookup, cache, and verify the specified file. This returns null + /// if the file doesn't exist. + /// + const FileEntry *getFile(const std::string &Filename) { + return getFile(&Filename[0], &Filename[0] + Filename.size()); + } + const FileEntry *getFile(const char *FilenameStart, + const char *FilenameEnd); + + void PrintStats() const; +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Basic/LangOptions.h b/include/clang/Basic/LangOptions.h new file mode 100644 index 0000000000..37a5e73614 --- /dev/null +++ b/include/clang/Basic/LangOptions.h @@ -0,0 +1,44 @@ +//===--- LangOptions.h - C Language Family Language Options -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the LangOptions interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LANGOPTIONS_H +#define LLVM_CLANG_LANGOPTIONS_H + +namespace clang { + +/// LangOptions - This class keeps track of the various options that can be +/// enabled, which controls the dialect of C that is accepted. +struct LangOptions { + unsigned Trigraphs : 1; // Trigraphs in source files. + unsigned BCPLComment : 1; // BCPL-style // comments. + unsigned DollarIdents : 1; // '$' allowed in identifiers. + unsigned Digraphs : 1; // When added to C? C99? + unsigned HexFloats : 1; // C99 Hexadecimal float constants. + unsigned C99 : 1; // C99 Support + unsigned Microsoft : 1; // Microsoft extensions. + unsigned CPlusPlus : 1; // C++ Support + unsigned NoExtensions : 1; // All extensions are disabled, strict mode. + unsigned CXXOperatorNames : 1; // Treat C++ operator names as keywords. + + unsigned ObjC1 : 1; // Objective C 1 support enabled. + unsigned ObjC2 : 1; // Objective C 2 support enabled. + + LangOptions() { + Trigraphs = BCPLComment = DollarIdents = Digraphs = ObjC1 = ObjC2 = 0; + C99 = Microsoft = CPlusPlus = NoExtensions = CXXOperatorNames = 0; + } +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Basic/SourceLocation.h b/include/clang/Basic/SourceLocation.h new file mode 100644 index 0000000000..1a90da9b09 --- /dev/null +++ b/include/clang/Basic/SourceLocation.h @@ -0,0 +1,108 @@ +//===--- SourceLocation.h - Compact identifier for Source Files -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the SourceLocation class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SOURCELOCATION_H +#define LLVM_CLANG_SOURCELOCATION_H + +namespace clang { + +/// SourceLocation - This is a carefully crafted 32-bit identifier that encodes +/// a full include stack, line and column number information for a position in +/// an input translation unit. +class SourceLocation { + unsigned ID; +public: + enum { + FileIDBits = 14, + FilePosBits = 32-FileIDBits + }; + + SourceLocation() : ID(0) {} // 0 is an invalid FileID. + + /// SourceLocation constructor - Create a new SourceLocation object with the + /// specified FileID and FilePos. + SourceLocation(unsigned FileID, unsigned FilePos) { + // If a FilePos is larger than (1<<FilePosBits), the SourceManager makes + // enough consequtive FileIDs that we have one for each chunk. + if (FilePos >= (1 << FilePosBits)) { + FileID += FilePos >> FilePosBits; + FilePos &= (1 << FilePosBits)-1; + } + + // FIXME: Find a way to handle out of FileID bits! Maybe MaxFileID is an + // escape of some sort? + if (FileID >= (1 << FileIDBits)) + FileID = (1 << FileIDBits)-1; + + ID = (FileID << FilePosBits) | FilePos; + } + + /// isValid - Return true if this is a valid SourceLocation object. Invalid + /// SourceLocations are often used when events have no corresponding location + /// in the source (e.g. a diagnostic is required for a command line option). + /// + bool isValid() const { return ID != 0; } + + /// getFileID - Return the file identifier for this SourceLocation. This + /// FileID can be used with the SourceManager object to obtain an entire + /// include stack for a file position reference. + unsigned getFileID() const { return ID >> FilePosBits; } + + /// getRawFilePos - Return the byte offset from the start of the file-chunk + /// referred to by FileID. This method should not be used to get the offset + /// from the start of the file, instead you should use + /// SourceManager::getFilePos. This method will be incorrect for large files. + unsigned getRawFilePos() const { return ID & ((1 << FilePosBits)-1); } + + + /// getRawEncoding - When a SourceLocation itself cannot be used, this returns + /// an (opaque) 32-bit integer encoding for it. This should only be passed + /// to SourceLocation::getFromRawEncoding, it should not be inspected + /// directly. + unsigned getRawEncoding() const { return ID; } + + /// getFromRawEncoding - Turn a raw encoding of a SourceLocation object into + /// a real SourceLocation. + static SourceLocation getFromRawEncoding(unsigned Encoding) { + SourceLocation X; + X.ID = Encoding; + return X; + } +}; + +inline bool operator==(const SourceLocation &LHS, const SourceLocation &RHS) { + return LHS.getRawEncoding() == RHS.getRawEncoding(); +} + +inline bool operator!=(const SourceLocation &LHS, const SourceLocation &RHS) { + return !(LHS == RHS); +} + +/// SourceRange - a trival tuple used to represent a source range. +class SourceRange { + SourceLocation B; + SourceLocation E; +public: + SourceRange(): B(SourceLocation()), E(SourceLocation()) {} + SourceRange(SourceLocation loc) : B(loc), E(loc) {} + SourceRange(SourceLocation begin, SourceLocation end) : B(begin), E(end) {} + + SourceLocation Begin() const { return B; } + SourceLocation End() const { return E; } + + bool isValid() const { return B.isValid() && E.isValid(); } +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Basic/SourceManager.h b/include/clang/Basic/SourceManager.h new file mode 100644 index 0000000000..ef0ac0ba98 --- /dev/null +++ b/include/clang/Basic/SourceManager.h @@ -0,0 +1,341 @@ +//===--- SourceManager.h - Track and cache source files ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the SourceManager interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SOURCEMANAGER_H +#define LLVM_CLANG_SOURCEMANAGER_H + +#include "clang/Basic/SourceLocation.h" +#include <vector> +#include <map> +#include <list> + +namespace llvm { +class MemoryBuffer; +} + +namespace clang { + +class SourceManager; +class FileEntry; +class IdentifierTokenInfo; + +/// SrcMgr - Private classes that are part of the SourceManager implementation. +/// +namespace SrcMgr { + /// FileInfo - Once instance of this struct is kept for every file loaded or + /// used. This object owns the MemoryBuffer object. + struct FileInfo { + /// Buffer - The actual buffer containing the characters from the input + /// file. + const llvm::MemoryBuffer *Buffer; + + /// SourceLineCache - A new[]'d array of offsets for each source line. This + /// is lazily computed. + /// + unsigned *SourceLineCache; + + /// NumLines - The number of lines in this FileInfo. This is only valid if + /// SourceLineCache is non-null. + unsigned NumLines; + }; + + typedef std::pair<const FileEntry * const, FileInfo> InfoRec; + + /// FileIDInfo - Information about a FileID, basically just the logical file + /// that it represents and include stack information. A SourceLocation is a + /// byte offset from the start of this. + /// + /// FileID's are used to compute the location of a character in memory as well + /// as the logical source location, which can be differ from the physical + /// location. It is different when #line's are active or when macros have + /// been expanded. + /// + /// Each FileID has include stack information, indicating where it came from. + /// For the primary translation unit, it comes from SourceLocation() aka 0. + /// + /// There are three types of FileID's: + /// 1. Normal MemoryBuffer (file). These are represented by a "InfoRec *", + /// describing the source file, and a Chunk number, which factors into + /// the SourceLocation's offset from the start of the buffer. + /// 2. Macro Expansions. These indicate that the logical location is + /// totally different than the physical location. The logical source + /// location is specified by the IncludeLoc. The physical location is + /// the FilePos of the token's SourceLocation combined with the FileID + /// from MacroTokenFileID. + /// + struct FileIDInfo { + enum FileIDType { + NormalBuffer, + MacroExpansion + }; + + /// The type of this FileID. + FileIDType IDType; + + /// IncludeLoc - The location of the #include that brought in this file. + /// This SourceLocation object has a FileId of 0 for the main file. + SourceLocation IncludeLoc; + + /// This union is discriminated by IDType. + /// + union { + struct NormalBufferInfo { + /// ChunkNo - Really large buffers are broken up into chunks that are + /// each (1 << SourceLocation::FilePosBits) in size. This specifies the + /// chunk number of this FileID. + unsigned ChunkNo; + + /// FileInfo - Information about the source buffer itself. + /// + const InfoRec *Info; + } NormalBuffer; + + /// MacroTokenFileID - This is the File ID that contains the characters + /// that make up the expanded token. + unsigned MacroTokenFileID; + } u; + + /// getNormalBuffer - Return a FileIDInfo object for a normal buffer + /// reference. + static FileIDInfo getNormalBuffer(SourceLocation IL, unsigned CN, + const InfoRec *Inf) { + FileIDInfo X; + X.IDType = NormalBuffer; + X.IncludeLoc = IL; + X.u.NormalBuffer.ChunkNo = CN; + X.u.NormalBuffer.Info = Inf; + return X; + } + + /// getMacroExpansion - Return a FileID for a macro expansion. IL specifies + /// the instantiation location, and MacroFID specifies the FileID that the + /// token's characters come from. + static FileIDInfo getMacroExpansion(SourceLocation IL, + unsigned MacroFID) { + FileIDInfo X; + X.IDType = MacroExpansion; + X.IncludeLoc = IL; + X.u.MacroTokenFileID = MacroFID; + return X; + } + + unsigned getNormalBufferChunkNo() const { + assert(IDType == NormalBuffer && "Not a normal buffer!"); + return u.NormalBuffer.ChunkNo; + } + + const InfoRec *getNormalBufferInfo() const { + assert(IDType == NormalBuffer && "Not a normal buffer!"); + return u.NormalBuffer.Info; + } + }; +} // end SrcMgr namespace. + + +/// SourceManager - This file handles loading and caching of source files into +/// memory. This object owns the MemoryBuffer objects for all of the loaded +/// files and assigns unique FileID's for each unique #include chain. +/// +/// The SourceManager can be queried for information about SourceLocation +/// objects, turning them into either physical or logical locations. Physical +/// locations represent where the bytes corresponding to a token came from and +/// logical locations represent where the location is in the user's view. In +/// the case of a macro expansion, for example, the physical location indicates +/// where the expanded token came from and the logical location specifies where +/// it was expanded. Logical locations are also influenced by #line directives, +/// etc. +class SourceManager { + /// FileInfos - Memoized information about all of the files tracked by this + /// SourceManager. + std::map<const FileEntry *, SrcMgr::FileInfo> FileInfos; + + /// MemBufferInfos - Information about various memory buffers that we have + /// read in. This is a list, instead of a vector, because we need pointers to + /// the FileInfo objects to be stable. + std::list<SrcMgr::InfoRec> MemBufferInfos; + + /// FileIDs - Information about each FileID. FileID #0 is not valid, so all + /// entries are off by one. + std::vector<SrcMgr::FileIDInfo> FileIDs; + + /// LastInstantiationLoc_* - Cache the last instantiation request for fast + /// lookup. Macros often want many tokens instantated at the same location. + SourceLocation LastInstantiationLoc_InstantLoc; + unsigned LastInstantiationLoc_MacroFID; + unsigned LastInstantiationLoc_Result; +public: + SourceManager() { LastInstantiationLoc_MacroFID = ~0U; } + ~SourceManager(); + + /// createFileID - Create a new FileID that represents the specified file + /// being #included from the specified IncludePosition. This returns 0 on + /// error and translates NULL into standard input. + unsigned createFileID(const FileEntry *SourceFile, SourceLocation IncludePos){ + const SrcMgr::InfoRec *IR = getInfoRec(SourceFile); + if (IR == 0) return 0; // Error opening file? + return createFileID(IR, IncludePos); + } + + /// createFileIDForMemBuffer - Create a new FileID that represents the + /// specified memory buffer. This does no caching of the buffer and takes + /// ownership of the MemoryBuffer, so only pass a MemoryBuffer to this once. + unsigned createFileIDForMemBuffer(const llvm::MemoryBuffer *Buffer) { + return createFileID(createMemBufferInfoRec(Buffer), SourceLocation()); + } + + /// getInstantiationLoc - Return a new SourceLocation that encodes the fact + /// that a token from physloc PhysLoc should actually be referenced from + /// InstantiationLoc. + SourceLocation getInstantiationLoc(SourceLocation PhysLoc, + SourceLocation InstantiationLoc); + + /// getBuffer - Return the buffer for the specified FileID. + /// + const llvm::MemoryBuffer *getBuffer(unsigned FileID) const { + return getFileInfo(FileID)->Buffer; + } + + /// getIncludeLoc - Return the location of the #include for the specified + /// FileID. + SourceLocation getIncludeLoc(unsigned FileID) const; + + /// getFilePos - This (efficient) method returns the offset from the start of + /// the file that the specified SourceLocation represents. This returns the + /// location of the physical character data, not the logical file position. + unsigned getFilePos(SourceLocation Loc) const { + const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID()); + + // For Macros, the physical loc is specified by the MacroTokenFileID. + if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) + FIDInfo = &FileIDs[FIDInfo->u.MacroTokenFileID-1]; + + // If this file has been split up into chunks, factor in the chunk number + // that the FileID references. + unsigned ChunkNo = FIDInfo->getNormalBufferChunkNo(); + return Loc.getRawFilePos() + (ChunkNo << SourceLocation::FilePosBits); + } + + /// getCharacterData - Return a pointer to the start of the specified location + /// in the appropriate MemoryBuffer. + const char *getCharacterData(SourceLocation SL) const; + + /// getColumnNumber - Return the column # for the specified include position. + /// this is significantly cheaper to compute than the line number. This + /// returns zero if the column number isn't known. + unsigned getColumnNumber(SourceLocation Loc) const; + + /// getLineNumber - Given a SourceLocation, return the physical line number + /// for the position indicated. This requires building and caching a table of + /// line offsets for the MemoryBuffer, so this is not cheap: use only when + /// about to emit a diagnostic. + unsigned getLineNumber(SourceLocation Loc); + + /// getSourceFilePos - This method returns the *logical* offset from the start + /// of the file that the specified SourceLocation represents. This returns + /// the location of the *logical* character data, not the physical file + /// position. In the case of macros, for example, this returns where the + /// macro was instantiated, not where the characters for the macro can be + /// found. + unsigned getSourceFilePos(SourceLocation Loc) const; + + /// getSourceName - This method returns the name of the file or buffer that + /// the SourceLocation specifies. This can be modified with #line directives, + /// etc. + std::string getSourceName(SourceLocation Loc); + + /// getFileEntryForFileID - Return the FileEntry record for the specified + /// FileID if one exists. + const FileEntry *getFileEntryForFileID(unsigned FileID) const { + assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); + return FileIDs[FileID-1].getNormalBufferInfo()->first; + } + + /// Given a SourceLocation object, return the logical location referenced by + /// the ID. This logical location is subject to #line directives, etc. + SourceLocation getLogicalLoc(SourceLocation Loc) const { + if (Loc.getFileID() == 0) return Loc; + + const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID()); + if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) + return FIDInfo->IncludeLoc; + return Loc; + } + + /// getPhysicalLoc - Given a SourceLocation object, return the physical + /// location referenced by the ID. + SourceLocation getPhysicalLoc(SourceLocation Loc) const { + if (Loc.getFileID() == 0) return Loc; + + // For Macros, the physical loc is specified by the MacroTokenFileID. + const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID()); + if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) + return SourceLocation(FIDInfo->u.MacroTokenFileID, + Loc.getRawFilePos()); + return Loc; + } + + /// PrintStats - Print statistics to stderr. + /// + void PrintStats() const; +private: + /// createFileID - Create a new fileID for the specified InfoRec and include + /// position. This works regardless of whether the InfoRec corresponds to a + /// file or some other input source. + unsigned createFileID(const SrcMgr::InfoRec *File, SourceLocation IncludePos); + + /// getInfoRec - Create or return a cached FileInfo for the specified file. + /// This returns null on failure. + const SrcMgr::InfoRec *getInfoRec(const FileEntry *SourceFile); + + /// createMemBufferInfoRec - Create a new info record for the specified memory + /// buffer. This does no caching. + const SrcMgr::InfoRec *createMemBufferInfoRec(const llvm::MemoryBuffer *Buf); + + const SrcMgr::FileIDInfo *getFIDInfo(unsigned FileID) const { + assert(FileID-1 < FileIDs.size() && "Invalid FileID!"); + return &FileIDs[FileID-1]; + } + + /// Return the InfoRec structure for the specified FileID. This is always the + /// physical reference for the ID. + const SrcMgr::InfoRec *getInfoRec(const SrcMgr::FileIDInfo *FIDInfo) const { + // For Macros, the physical loc is specified by the MacroTokenFileID. + if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) + FIDInfo = &FileIDs[FIDInfo->u.MacroTokenFileID-1]; + return FIDInfo->getNormalBufferInfo(); + } + const SrcMgr::InfoRec *getInfoRec(unsigned FileID) const { + return getInfoRec(getFIDInfo(FileID)); + } + + SrcMgr::FileInfo *getFileInfo(const SrcMgr::FileIDInfo *FIDInfo) const { + if (const SrcMgr::InfoRec *IR = getInfoRec(FIDInfo)) + return const_cast<SrcMgr::FileInfo *>(&IR->second); + return 0; + } + SrcMgr::FileInfo *getFileInfo(unsigned FileID) const { + if (const SrcMgr::InfoRec *IR = getInfoRec(FileID)) + return const_cast<SrcMgr::FileInfo *>(&IR->second); + return 0; + } + SrcMgr::FileInfo *getFileInfo(const FileEntry *SourceFile) { + if (const SrcMgr::InfoRec *IR = getInfoRec(SourceFile)) + return const_cast<SrcMgr::FileInfo *>(&IR->second); + return 0; + } +}; + + +} // end namespace clang + +#endif diff --git a/include/clang/Basic/TargetInfo.h b/include/clang/Basic/TargetInfo.h new file mode 100644 index 0000000000..17fa86bdc8 --- /dev/null +++ b/include/clang/Basic/TargetInfo.h @@ -0,0 +1,208 @@ +//===--- TargetInfo.h - Expose information about the target -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the TargetInfo and TargetInfoImpl interfaces. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_TARGETINFO_H +#define LLVM_CLANG_BASIC_TARGETINFO_H + +#include "clang/Basic/SourceLocation.h" +#include <vector> +#include <string> + +namespace clang { + +class TargetInfoImpl; +class Diagnostic; +namespace Builtin { struct Info; } + +/// TargetInfo - This class exposes information about the current target set. +/// A target set consists of a primary target and zero or more secondary targets +/// which are each represented by a TargetInfoImpl object. TargetInfo responds +/// to various queries as though it were the primary target, but keeps track of, +/// and warns about, the first query made of it that are contradictary among the +/// targets it tracks. For example, if it contains a "PPC32" and "PPC64" +/// target, it will warn the first time the size of the 'long' datatype is +/// queried. +/// +/// Note that TargetInfo does not take ownership of the various targets or the +/// diagnostic info, but does expect them to be alive for as long as it is. +/// +class TargetInfo { + /// Primary - This tracks the primary target in the target set. + /// + const TargetInfoImpl *PrimaryTarget; + + /// SecondaryTargets - This tracks the set of secondary targets. + /// + std::vector<const TargetInfoImpl*> SecondaryTargets; + + /// Diag - If non-null, this object is used to report the first use of + /// non-portable functionality in the translation unit. + /// + Diagnostic *Diag; + + /// NonPortable - This instance variable keeps track of whether or not the + /// current translation unit is portable across the set of targets tracked. + bool NonPortable; + + /// These are all caches for target values. + unsigned WCharWidth; + +public: + TargetInfo(const TargetInfoImpl *Primary, Diagnostic *D = 0) { + PrimaryTarget = Primary; + Diag = D; + NonPortable = false; + + // Initialize Cache values to uncomputed. + WCharWidth = 0; + } + + /// isNonPortable - Return true if the current translation unit has used a + /// target property that is non-portable across the secondary targets. + bool isNonPortable() const { + return NonPortable; + } + + /// isPortable - Return true if this translation unit is portable across the + /// secondary targets so far. + bool isPortable() const { + return !NonPortable; + } + + /// AddSecondaryTarget - Add a secondary target to the target set. + void AddSecondaryTarget(const TargetInfoImpl *Secondary) { + SecondaryTargets.push_back(Secondary); + } + + ///===---- Target property query methods --------------------------------===// + + /// DiagnoseNonPortability - Emit a diagnostic indicating that the current + /// translation unit is non-portable due to a construct at the specified + /// location. DiagKind indicates what went wrong. + void DiagnoseNonPortability(SourceLocation Loc, unsigned DiagKind); + + /// getTargetDefines - Appends the target-specific #define values for this + /// target set to the specified buffer. + void getTargetDefines(std::vector<char> &DefineBuffer); + + /// isCharSigned - Return true if 'char' is 'signed char' or false if it is + /// treated as 'unsigned char'. This is implementation defined according to + /// C99 6.2.5p15. In our implementation, this is target-specific. + bool isCharSigned(SourceLocation Loc) { + // FIXME: implement correctly. + return true; + } + + /// getPointerWidth - Return the width of pointers on this target, we + /// currently assume one pointer type. + unsigned getPointerWidth(SourceLocation Loc) { + return 32; // FIXME: implement correctly. + } + + /// getBoolWidth - Return the size of '_Bool' and C++ 'bool' for this target, + /// in bits. + unsigned getBoolWidth(SourceLocation Loc) { + return 8; // FIXME: implement correctly: wrong for ppc32. + } + + /// getCharWidth - Return the size of 'char', 'signed char' and + /// 'unsigned char' for this target, in bits. + unsigned getCharWidth(SourceLocation Loc) { + return 8; // FIXME: implement correctly. + } + + /// getShortWidth - Return the size of 'signed short' and 'unsigned short' for + /// this target, in bits. + unsigned getShortWidth(SourceLocation Loc) { + return 16; // FIXME: implement correctly. + } + + /// getIntWidth - Return the size of 'signed int' and 'unsigned int' for this + /// target, in bits. + unsigned getIntWidth(SourceLocation Loc) { + return 32; // FIXME: implement correctly. + } + + /// getLongWidth - Return the size of 'signed long' and 'unsigned long' for + /// this target, in bits. + unsigned getLongWidth(SourceLocation Loc) { + return 32; // FIXME: implement correctly: wrong for ppc64/x86-64 + } + + /// getLongLongWidth - Return the size of 'signed long long' and + /// 'unsigned long long' for this target, in bits. + unsigned getLongLongWidth(SourceLocation Loc) { + return 64; // FIXME: implement correctly. + } + + /// getWCharWidth - Return the size of wchar_t in bits. + /// + unsigned getWCharWidth(SourceLocation Loc) { + if (!WCharWidth) ComputeWCharWidth(Loc); + return WCharWidth; + } + + /// getIntMaxTWidth - Return the size of intmax_t and uintmax_t for this + /// target, in bits. + unsigned getIntMaxTWidth(SourceLocation Loc) { + // FIXME: implement correctly. + return 64; + } + + /// getTargetBuiltins - Return information about target-specific builtins for + /// the current primary target, and info about which builtins are non-portable + /// across the current set of primary and secondary targets. + void getTargetBuiltins(const Builtin::Info *&Records, unsigned &NumRecords, + std::vector<const char *> &NonPortableBuiltins) const; + +private: + void ComputeWCharWidth(SourceLocation Loc); +}; + + + + +/// TargetInfoImpl - This class is implemented for specific targets and is used +/// by the TargetInfo class. Target implementations should initialize instance +/// variables and implement various virtual methods if the default values are +/// not appropriate for the target. +class TargetInfoImpl { +protected: + unsigned WCharWidth; /// sizeof(wchar_t) in bits. Default value is 32. +public: + TargetInfoImpl() : WCharWidth(32) {} + virtual ~TargetInfoImpl() {} + + /// getTargetDefines - Return a list of the target-specific #define values set + /// when compiling to this target. Each string should be of the form "X", + /// which results in '#define X 1' or "X=Y" which results in "#define X Y" + virtual void getTargetDefines(std::vector<std::string> &Defines) const = 0; + + /// getWCharWidth - Return the size of wchar_t in bits. + /// + unsigned getWCharWidth() const { return WCharWidth; } + + /// getTargetBuiltins - Return information about target-specific builtins for + /// the target. + virtual void getTargetBuiltins(const Builtin::Info *&Records, + unsigned &NumRecords) const { + Records = 0; + NumRecords = 0; + } +private: + virtual void ANCHOR(); // out-of-line virtual method for class. +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Basic/TokenKinds.def b/include/clang/Basic/TokenKinds.def new file mode 100644 index 0000000000..4fab08dfde --- /dev/null +++ b/include/clang/Basic/TokenKinds.def @@ -0,0 +1,354 @@ +//===--- TokenKinds.def - C Family Token Kind Database ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the TokenKind database. This includes normal tokens like +// tok::ampamp (corresponding to the && token) as well as keywords for various +// languages. Users of this file must optionally #define the TOK, KEYWORD, +// ALIAS, or PPKEYWORD macros to make use of this file. +// +//===----------------------------------------------------------------------===// + +#ifndef TOK +#define TOK(X) +#endif +#ifndef KEYWORD +#define KEYWORD(X,Y) TOK(kw_ ## X) +#endif +#ifndef ALIAS +#define ALIAS(X,Y) +#endif +#ifndef PPKEYWORD +#define PPKEYWORD(X) +#endif +#ifndef CXX_KEYWORD_OPERATOR +#define CXX_KEYWORD_OPERATOR(X,Y) +#endif +#ifndef OBJC1_AT_KEYWORD +#define OBJC1_AT_KEYWORD(X) +#endif +#ifndef OBJC2_AT_KEYWORD +#define OBJC2_AT_KEYWORD(X) +#endif + +//===----------------------------------------------------------------------===// +// Preprocessor keywords. +//===----------------------------------------------------------------------===// + +// These have meaning after a '#' at the start of a line. These define enums in +// the tok::pp_* namespace. +PPKEYWORD(not_keyword) + +// C99 6.10.1 - Conditional Inclusion. +PPKEYWORD(if) +PPKEYWORD(ifdef) +PPKEYWORD(ifndef) +PPKEYWORD(elif) +PPKEYWORD(else) +PPKEYWORD(endif) +PPKEYWORD(defined) + +// C99 6.10.2 - Source File Inclusion. +PPKEYWORD(include) + +// C99 6.10.3 - Macro Replacement. +PPKEYWORD(define) +PPKEYWORD(undef) + +// C99 6.10.4 - Line Control. +PPKEYWORD(line) + +// C99 6.10.5 - Error Directive. +PPKEYWORD(error) + +// C99 6.10.6 - Pragma Directive. +PPKEYWORD(pragma) + +// GNU Extensions. +PPKEYWORD(import) +PPKEYWORD(include_next) +PPKEYWORD(warning) +PPKEYWORD(ident) +PPKEYWORD(sccs) +PPKEYWORD(assert) +PPKEYWORD(unassert) + +// clang extensions. +PPKEYWORD(define_target) +PPKEYWORD(define_other_target) + + +//===----------------------------------------------------------------------===// +// Language keywords. +//===----------------------------------------------------------------------===// + +// These define members of the tok::kw_* namespace. + +TOK(unknown) // Not a token. +TOK(eof) // End of file. +TOK(eom) // End of macro (end of line inside a macro). + +// C99 6.4.9: Comments. +TOK(comment) // Comment (only in -E -C[C] mode) + +// C99 6.4.2: Identifiers. +TOK(identifier) // abcde123 + +// C99 6.4.4.1: Integer Constants +// C99 6.4.4.2: Floating Constants +TOK(numeric_constant) // 0x123 + +// C99 6.4.4: Character Constants +TOK(char_constant) // 'a' L'b' + +// C99 6.4.5: String Literals. +TOK(string_literal) // "foo" +TOK(wide_string_literal) // L"foo" +TOK(angle_string_literal)// <foo> + +// C99 6.4.6: Punctuators. +TOK(l_square) // [ +TOK(r_square) // ] +TOK(l_paren) // ( +TOK(r_paren) // ) +TOK(l_brace) // { +TOK(r_brace) // } +TOK(period) // . +TOK(ellipsis) // ... +TOK(amp) // & +TOK(ampamp) // && +TOK(ampequal) // &= +TOK(star) // * +TOK(starequal) // *= +TOK(plus) // + +TOK(plusplus) // ++ +TOK(plusequal) // += +TOK(minus) // - +TOK(arrow) // -> +TOK(minusminus) // -- +TOK(minusequal) // -= +TOK(tilde) // ~ +TOK(exclaim) // ! +TOK(exclaimequal) // != +TOK(slash) // / +TOK(slashequal) // /= +TOK(percent) // % +TOK(percentequal) // %= +TOK(less) // < +TOK(lessless) // << +TOK(lessequal) // <= +TOK(lesslessequal) // <<= +TOK(greater) // > +TOK(greatergreater) // >> +TOK(greaterequal) // >= +TOK(greatergreaterequal) // >>= +TOK(caret) // ^ +TOK(caretequal) // ^= +TOK(pipe) // | +TOK(pipepipe) // || +TOK(pipeequal) // |= +TOK(question) // ? +TOK(colon) // : +TOK(semi) // ; +TOK(equal) // = +TOK(equalequal) // == +TOK(comma) // , +TOK(hash) // # +TOK(hashhash) // ## +TOK(hashat) // #@ + +// C++ Support +TOK(periodstar) // .* +TOK(arrowstar) // ->* +TOK(coloncolon) // :: + +// Objective C support. +TOK(at) // @ + +// at_identifier // @foo +// at_string // @"foo" + + +// C99 6.4.1: Keywords. These turn into kw_* tokens. +// Flags allowed: +// NOTC90 - In C90, this token is never available. +// EXTC90 - In C90, this token is an extension that is enabled unless strict. +// NOTC99 - In C99, this token is never available. +// EXTC99 - In C99, this token is an extension that is enabled unless strict. +// NOTCPP - In C++, this token is never available. +// EXTCPP - In C++, this token is an extension that is enabled unless strict. +// +KEYWORD(auto , 0) +KEYWORD(break , 0) +KEYWORD(case , 0) +KEYWORD(char , 0) +KEYWORD(const , 0) +KEYWORD(continue , 0) +KEYWORD(default , 0) +KEYWORD(do , 0) +KEYWORD(double , 0) +KEYWORD(else , 0) +KEYWORD(enum , 0) +KEYWORD(extern , 0) +KEYWORD(float , 0) +KEYWORD(for , 0) +KEYWORD(goto , 0) +KEYWORD(if , 0) +KEYWORD(inline , EXTC90) // Ext in C90, ok in C99/C++ +KEYWORD(int , 0) +KEYWORD(long , 0) +KEYWORD(register , 0) +KEYWORD(restrict , EXTC90) // Ext in C90 +KEYWORD(return , 0) +KEYWORD(short , 0) +KEYWORD(signed , 0) +KEYWORD(sizeof , 0) +KEYWORD(static , 0) +KEYWORD(struct , 0) +KEYWORD(switch , 0) +KEYWORD(typedef , 0) +KEYWORD(union , 0) +KEYWORD(unsigned , 0) +KEYWORD(void , 0) +KEYWORD(volatile , 0) +KEYWORD(while , 0) +KEYWORD(_Bool , EXTC90|EXTCPP) // C99 only +KEYWORD(_Complex , EXTC90) // C99/C++ +KEYWORD(_Imaginary , EXTC90|NOTCPP) // C90 only + +// Special tokens to the compiler. +KEYWORD(__func__ , EXTC90|EXTCPP) // Only in C99. +KEYWORD(__FUNCTION__ , EXTC90|EXTC99|EXTCPP) // GCC Extension. +KEYWORD(__PRETTY_FUNCTION__ , EXTC90|EXTC99|EXTCPP) // GCC Extension. + +// C++ 2.11p1: Keywords. +KEYWORD(asm , EXTC90|EXTC99) // Exts in C90/C99 +KEYWORD(bool , NOTC90|NOTC99) +KEYWORD(catch , NOTC90|NOTC99) +KEYWORD(class , NOTC90|NOTC99) +KEYWORD(const_cast , NOTC90|NOTC99) +KEYWORD(delete , NOTC90|NOTC99) +KEYWORD(dynamic_cast , NOTC90|NOTC99) +KEYWORD(explicit , NOTC90|NOTC99) +KEYWORD(export , NOTC90|NOTC99) +KEYWORD(false , NOTC90|NOTC99) +KEYWORD(friend , NOTC90|NOTC99) +KEYWORD(mutable , NOTC90|NOTC99) +KEYWORD(namespace , NOTC90|NOTC99) +KEYWORD(new , NOTC90|NOTC99) +KEYWORD(operator , NOTC90|NOTC99) +KEYWORD(private , NOTC90|NOTC99) +KEYWORD(protected , NOTC90|NOTC99) +KEYWORD(public , NOTC90|NOTC99) +KEYWORD(reinterpret_cast , NOTC90|NOTC99) +KEYWORD(static_cast , NOTC90|NOTC99) +KEYWORD(template , NOTC90|NOTC99) +KEYWORD(this , NOTC90|NOTC99) +KEYWORD(throw , NOTC90|NOTC99) +KEYWORD(true , NOTC90|NOTC99) +KEYWORD(try , NOTC90|NOTC99) +KEYWORD(typename , NOTC90|NOTC99) +KEYWORD(typeid , NOTC90|NOTC99) +KEYWORD(using , NOTC90|NOTC99) +KEYWORD(virtual , NOTC90|NOTC99) +KEYWORD(wchar_t , NOTC90|NOTC99) + +// C++ 2.5p2: Alternative Representations. +CXX_KEYWORD_OPERATOR(and , ampamp) +CXX_KEYWORD_OPERATOR(and_eq , ampequal) +CXX_KEYWORD_OPERATOR(bitand , amp) +CXX_KEYWORD_OPERATOR(bitor , pipe) +CXX_KEYWORD_OPERATOR(compl , tilde) +CXX_KEYWORD_OPERATOR(not , exclaim) +CXX_KEYWORD_OPERATOR(not_eq , exclaimequal) +CXX_KEYWORD_OPERATOR(or , pipepipe) +CXX_KEYWORD_OPERATOR(or_eq , pipeequal) +CXX_KEYWORD_OPERATOR(xor , caret) +CXX_KEYWORD_OPERATOR(xor_eq , caretequal) + +// GNU Extensions. +KEYWORD(_Decimal32 , EXTC90|EXTC99|EXTCPP) +KEYWORD(_Decimal64 , EXTC90|EXTC99|EXTCPP) +KEYWORD(_Decimal128 , EXTC90|EXTC99|EXTCPP) +KEYWORD(typeof , EXTC90|EXTC99|EXTCPP) +KEYWORD(__null , NOTC90|NOTC99|EXTCPP) // C++-only Extensn +KEYWORD(__alignof , EXTC90|EXTC99|EXTCPP) +KEYWORD(__attribute , EXTC90|EXTC99|EXTCPP) +KEYWORD(__builtin_choose_expr , EXTC90|EXTC99|EXTCPP) +KEYWORD(__builtin_offsetof , EXTC90|EXTC99|EXTCPP) +KEYWORD(__builtin_types_compatible_p, EXTC90|EXTC99|EXTCPP) +KEYWORD(__builtin_va_arg , EXTC90|EXTC99|EXTCPP) +KEYWORD(__extension__ , 0) // Not treated as an extension! +KEYWORD(__imag , EXTC90|EXTC99|EXTCPP) +KEYWORD(__label__ , EXTC90|EXTC99|EXTCPP) +KEYWORD(__real , EXTC90|EXTC99|EXTCPP) +KEYWORD(__thread , EXTC90|EXTC99|EXTCPP) + +// Alternate spelling for various tokens. There are GCC extensions in all +// languages, but should not be disabled in strict conformance mode. +ALIAS("__attribute__", __attribute) +ALIAS("__const" , const ) +ALIAS("__const__" , const ) +ALIAS("__alignof__" , __alignof ) +ALIAS("__asm" , asm ) +ALIAS("__asm__" , asm ) +ALIAS("__complex" , _Complex ) +ALIAS("__complex__" , _Complex ) +ALIAS("__imag__" , __imag ) +ALIAS("__inline" , inline ) +ALIAS("__inline__" , inline ) +ALIAS("__real__" , __real ) +ALIAS("__restrict" , restrict ) +ALIAS("__restrict__" , restrict ) +ALIAS("__signed" , signed ) +ALIAS("__signed__" , signed ) +ALIAS("__typeof" , typeof ) +ALIAS("__typeof__" , typeof ) +ALIAS("__volatile" , volatile ) +ALIAS("__volatile__" , volatile ) + + +//===----------------------------------------------------------------------===// +// Objective-C @-preceeded keywords. +//===----------------------------------------------------------------------===// + +// These have meaning after an '@' in Objective-C mode. These define enums in +// the tok::objc_* namespace. + +OBJC1_AT_KEYWORD(not_keyword) +OBJC1_AT_KEYWORD(class) +OBJC1_AT_KEYWORD(compatibility_alias) +OBJC1_AT_KEYWORD(defs) +OBJC1_AT_KEYWORD(encode) +OBJC1_AT_KEYWORD(end) +OBJC1_AT_KEYWORD(implementation) +OBJC1_AT_KEYWORD(interface) +OBJC1_AT_KEYWORD(private) +OBJC1_AT_KEYWORD(protected) +OBJC1_AT_KEYWORD(protocol) +OBJC1_AT_KEYWORD(public) +OBJC1_AT_KEYWORD(selector) +OBJC1_AT_KEYWORD(throw) +OBJC1_AT_KEYWORD(try) +OBJC1_AT_KEYWORD(catch) +OBJC1_AT_KEYWORD(finally) +OBJC1_AT_KEYWORD(synchronized) + +// I'm guessing this is an objc2 keyword, what are the others? +OBJC2_AT_KEYWORD(property) + +// TODO: What to do about context-sensitive keywords like: +// bycopy/byref/in/inout/oneway/out? + +#undef OBJC2_AT_KEYWORD +#undef OBJC1_AT_KEYWORD +#undef CXX_KEYWORD_OPERATOR +#undef PPKEYWORD +#undef ALIAS +#undef KEYWORD +#undef TOK diff --git a/include/clang/Basic/TokenKinds.h b/include/clang/Basic/TokenKinds.h new file mode 100644 index 0000000000..e51f0c7f8a --- /dev/null +++ b/include/clang/Basic/TokenKinds.h @@ -0,0 +1,51 @@ +//===--- TokenKinds.h - Enum values for C Token Kinds -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the TokenKind enum and support functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOKENKINDS_H +#define LLVM_CLANG_TOKENKINDS_H + +namespace clang { + +namespace tok { + +/// TokenKind - This provides a simple uniform namespace for tokens from all C +/// languages. +enum TokenKind { +#define TOK(X) X, +#include "clang/Basic/TokenKinds.def" + NUM_TOKENS +}; + +/// PPKeywordKind - This provides a namespace for preprocessor keywords which +/// start with a '#' at the beginning of the line. +enum PPKeywordKind { +#define PPKEYWORD(X) pp_##X, +#include "clang/Basic/TokenKinds.def" + NUM_PP_KEYWORDS +}; + +/// ObjCKeywordKind - This provides a namespace for Objective-C keywords which +/// start with an '@'. +enum ObjCKeywordKind { +#define OBJC1_AT_KEYWORD(X) objc_##X, +#define OBJC2_AT_KEYWORD(X) objc_##X, +#include "clang/Basic/TokenKinds.def" + NUM_OBJC_KEYWORDS +}; + +const char *getTokenName(enum TokenKind Kind); + +} // end namespace tok +} // end namespace clang + +#endif diff --git a/include/clang/CodeGen/ModuleBuilder.h b/include/clang/CodeGen/ModuleBuilder.h new file mode 100644 index 0000000000..e9cb2b5bd4 --- /dev/null +++ b/include/clang/CodeGen/ModuleBuilder.h @@ -0,0 +1,47 @@ +//===--- CodeGen/ModuleBuilder.h - Build LLVM from ASTs ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ModuleBuilder interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_CODEGEN_MODULEBUILDER_H +#define LLVM_CLANG_CODEGEN_MODULEBUILDER_H + +namespace llvm { + class Module; +} + +namespace clang { + class ASTContext; + class FunctionDecl; + +namespace CodeGen { + /// BuilderTy - This is an opaque type used to reference ModuleBuilder + /// objects. + typedef void BuilderTy; + + /// Init - Create an ModuleBuilder with the specified ASTContext. + BuilderTy *Init(ASTContext &Context, llvm::Module &M); + + /// CodeGenFunction - Convert the AST node for a FunctionDecl into LLVM. + /// + void CodeGenFunction(BuilderTy *Builder, FunctionDecl *D); + + /// PrintStats - Emit statistic information to stderr. + /// + void PrintStats(BuilderTy *Builder); + + /// Terminate - Gracefully shut down the builder. + /// + void Terminate(BuilderTy *Builder); +} // end namespace CodeGen +} // end namespace clang + +#endif diff --git a/include/clang/Lex/DirectoryLookup.h b/include/clang/Lex/DirectoryLookup.h new file mode 100644 index 0000000000..a1cfb0a340 --- /dev/null +++ b/include/clang/Lex/DirectoryLookup.h @@ -0,0 +1,70 @@ +//===--- DirectoryLookup.h - Info for searching for headers -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the DirectoryLookup interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_DIRECTORYLOOKUP_H +#define LLVM_CLANG_LEX_DIRECTORYLOOKUP_H + +namespace clang { +class DirectoryEntry; + +/// DirectoryLookup - This class is used to specify the search order for +/// directories in #include directives. +class DirectoryLookup { +public: + enum DirType { + NormalHeaderDir, + SystemHeaderDir, + ExternCSystemHeaderDir + }; +private: + /// Dir - This is the actual directory that we're referring to. + /// + const DirectoryEntry *Dir; + + /// DirCharacteristic - The type of directory this is, one of the DirType enum + /// values. + DirType DirCharacteristic : 2; + + /// UserSupplied - True if this is a user-supplied directory. + /// + bool UserSupplied : 1; + + /// Framework - True if this is a framework directory search-path. + /// + bool Framework : 1; +public: + DirectoryLookup(const DirectoryEntry *dir, DirType DT, bool isUser, + bool isFramework) + : Dir(dir), DirCharacteristic(DT), UserSupplied(isUser), + Framework(isFramework) {} + + /// getDir - Return the directory that this entry refers to. + /// + const DirectoryEntry *getDir() const { return Dir; } + + /// DirCharacteristic - The type of directory this is, one of the DirType enum + /// values. + DirType getDirCharacteristic() const { return DirCharacteristic; } + + /// isUserSupplied - True if this is a user-supplied directory. + /// + bool isUserSupplied() const { return UserSupplied; } + + /// isFramework - True if this is a framework directory. + /// + bool isFramework() const { return Framework; } +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/HeaderSearch.h b/include/clang/Lex/HeaderSearch.h new file mode 100644 index 0000000000..79b3366ecd --- /dev/null +++ b/include/clang/Lex/HeaderSearch.h @@ -0,0 +1,166 @@ +//===--- HeaderSearch.h - Resolve Header File Locations ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the HeaderSearch interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_HEADERSEARCH_H +#define LLVM_CLANG_LEX_HEADERSEARCH_H + +#include "clang/Lex/DirectoryLookup.h" +#include "llvm/ADT/StringMap.h" +#include <vector> + +namespace clang { +class FileEntry; +class FileManager; +class IdentifierInfo; + + +/// HeaderSearch - This class encapsulates the information needed to find the +/// file referenced by a #include or #include_next, (sub-)framework lookup, etc. +class HeaderSearch { + FileManager &FileMgr; + + /// #include search path information. Requests for #include "x" search the + /// directory of the #including file first, then each directory in SearchDirs + /// consequtively. Requests for <x> search the current dir first, then each + /// directory in SearchDirs, starting at SystemDirIdx, consequtively. If + /// NoCurDirSearch is true, then the check for the file in the current + /// directory is supressed. + std::vector<DirectoryLookup> SearchDirs; + unsigned SystemDirIdx; + bool NoCurDirSearch; + + /// PreFileInfo - The preprocessor keeps track of this information for each + /// file that is #included. + struct PerFileInfo { + /// isImport - True if this is a #import'd or #pragma once file. + bool isImport : 1; + + /// DirInfo - Keep track of whether this is a system header, and if so, + /// whether it is C++ clean or not. This can be set by the include paths or + /// by #pragma gcc system_header. + DirectoryLookup::DirType DirInfo : 2; + + /// NumIncludes - This is the number of times the file has been included + /// already. + unsigned short NumIncludes; + + /// ControllingMacro - If this file has a #ifndef XXX (or equivalent) guard + /// that protects the entire contents of the file, this is the identifier + /// for the macro that controls whether or not it has any effect. + const IdentifierInfo *ControllingMacro; + + PerFileInfo() : isImport(false), DirInfo(DirectoryLookup::NormalHeaderDir), + NumIncludes(0), ControllingMacro(0) {} + }; + + /// FileInfo - This contains all of the preprocessor-specific data about files + /// that are included. The vector is indexed by the FileEntry's UID. + /// + std::vector<PerFileInfo> FileInfo; + + /// FrameworkMap - This is a collection mapping a framework or subframework + /// name like "Carbon" to the Carbon.framework directory. + llvm::StringMap<const DirectoryEntry *> FrameworkMap; + + // Various statistics we track for performance analysis. + unsigned NumIncluded; + unsigned NumMultiIncludeFileOptzn; + unsigned NumFrameworkLookups, NumSubFrameworkLookups; +public: + HeaderSearch(FileManager &FM); + + FileManager &getFileMgr() const { return FileMgr; } + + /// SetSearchPaths - Interface for setting the file search paths. + /// + void SetSearchPaths(const std::vector<DirectoryLookup> &dirs, + unsigned systemDirIdx, bool noCurDirSearch) { + SearchDirs = dirs; + SystemDirIdx = systemDirIdx; + NoCurDirSearch = noCurDirSearch; + } + + /// ClearFileInfo - Forget everything we know about headers so far. + void ClearFileInfo() { + FileInfo.clear(); + } + + /// LookupFile - Given a "foo" or <foo> reference, look up the indicated file, + /// return null on failure. isAngled indicates whether the file reference is + /// a <> reference. If successful, this returns 'UsedDir', the + /// DirectoryLookup member the file was found in, or null if not applicable. + /// If CurDir is non-null, the file was found in the specified directory + /// search location. This is used to implement #include_next. CurFileEnt, if + /// non-null, indicates where the #including file is, in case a relative + /// search is needed. + const FileEntry *LookupFile(const char *FilenameStart, + const char *FilenameEnd, bool isAngled, + const DirectoryLookup *FromDir, + const DirectoryLookup *&CurDir, + const FileEntry *CurFileEnt); + + /// LookupSubframeworkHeader - Look up a subframework for the specified + /// #include file. For example, if #include'ing <HIToolbox/HIToolbox.h> from + /// within ".../Carbon.framework/Headers/Carbon.h", check to see if HIToolbox + /// is a subframework within Carbon.framework. If so, return the FileEntry + /// for the designated file, otherwise return null. + const FileEntry *LookupSubframeworkHeader(const char *FilenameStart, + const char *FilenameEnd, + const FileEntry *RelativeFileEnt); + + /// ShouldEnterIncludeFile - Mark the specified file as a target of of a + /// #include, #include_next, or #import directive. Return false if #including + /// the file will have no effect or true if we should include it. + bool ShouldEnterIncludeFile(const FileEntry *File, bool isImport); + + + /// getFileDirFlavor - Return whether the specified file is a normal header, + /// a system header, or a C++ friendly system header. + DirectoryLookup::DirType getFileDirFlavor(const FileEntry *File) { + return getFileInfo(File).DirInfo; + } + + /// MarkFileIncludeOnce - Mark the specified file as a "once only" file, e.g. + /// due to #pragma once. + void MarkFileIncludeOnce(const FileEntry *File) { + getFileInfo(File).isImport = true; + } + + /// MarkFileSystemHeader - Mark the specified fiel as a system header, e.g. + /// due to #pragma GCC system_header. + void MarkFileSystemHeader(const FileEntry *File) { + getFileInfo(File).DirInfo = DirectoryLookup::SystemHeaderDir; + } + + /// SetFileControllingMacro - Mark the specified file as having a controlling + /// macro. This is used by the multiple-include optimization to eliminate + /// no-op #includes. + void SetFileControllingMacro(const FileEntry *File, + const IdentifierInfo *ControllingMacro) { + getFileInfo(File).ControllingMacro = ControllingMacro; + } + + void PrintStats(); +private: + const FileEntry *DoFrameworkLookup(const DirectoryEntry *Dir, + const char *FilenameStart, + const char *FilenameEnd); + + /// getFileInfo - Return the PerFileInfo structure for the specified + /// FileEntry. + PerFileInfo &getFileInfo(const FileEntry *FE); +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/IdentifierTable.h b/include/clang/Lex/IdentifierTable.h new file mode 100644 index 0000000000..4edba60094 --- /dev/null +++ b/include/clang/Lex/IdentifierTable.h @@ -0,0 +1,171 @@ +//===--- IdentifierTable.h - Hash table for identifier lookup ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the IdentifierInfo and IdentifierTable interfaces. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_IDENTIFIERTABLE_H +#define LLVM_CLANG_LEX_IDENTIFIERTABLE_H + +#include "clang/Basic/TokenKinds.h" +#include "llvm/ADT/StringMap.h" +#include <string> + +namespace clang { + class MacroInfo; + class LangOptions; + +/// IdentifierInfo - One of these records is kept for each identifier that +/// is lexed. This contains information about whether the token was #define'd, +/// is a language keyword, or if it is a front-end token of some sort (e.g. a +/// variable or function name). The preprocessor keeps this information in a +/// set, and all tok::identifier tokens have a pointer to one of these. +class IdentifierInfo { + MacroInfo *Macro; // Set if this identifier is #define'd. + tok::TokenKind TokenID : 8; // Front-end token ID or tok::identifier. + tok::PPKeywordKind PPID : 5; // ID for preprocessor command like #'ifdef'. + tok::ObjCKeywordKind ObjCID : 5; // ID for objc @ keyword like @'protocol'. + unsigned BuiltinID :12; // ID if this is a builtin (__builtin_inf). + bool IsExtension : 1; // True if identifier is a lang extension. + bool IsPoisoned : 1; // True if identifier is poisoned. + bool IsOtherTargetMacro : 1; // True if ident is macro on another target. + bool IsCPPOperatorKeyword : 1; // True if ident is a C++ operator keyword. + bool IsNonPortableBuiltin : 1; // True if builtin varies across targets. + void *FETokenInfo; // Managed by the language front-end. + IdentifierInfo(const IdentifierInfo&); // NONCOPYABLE. +public: + IdentifierInfo(); + ~IdentifierInfo(); + + /// getName - Return the actual string for this identifier. The length of + /// this string is stored in NameLen, and the returned string is properly null + /// terminated. + /// + const char *getName() const { + // String data is stored immediately after the IdentifierInfo object. + return (const char*)(this+1); + } + + /// getMacroInfo - Return macro information about this identifier, or null if + /// it is not a macro. + MacroInfo *getMacroInfo() const { return Macro; } + void setMacroInfo(MacroInfo *I) { Macro = I; } + + /// get/setTokenID - If this is a source-language token (e.g. 'for'), this API + /// can be used to cause the lexer to map identifiers to source-language + /// tokens. + tok::TokenKind getTokenID() const { return TokenID; } + void setTokenID(tok::TokenKind ID) { TokenID = ID; } + + /// getPPKeywordID - Return the preprocessor keyword ID for this identifier. + /// For example, define will return tok::pp_define. + tok::PPKeywordKind getPPKeywordID() const { return PPID; } + void setPPKeywordID(tok::PPKeywordKind ID) { PPID = ID; } + + /// getObjCKeywordID - Return the Objective-C keyword ID for the this + /// identifier. For example, 'class' will return tok::objc_class if ObjC is + /// enabled. + tok::ObjCKeywordKind getObjCKeywordID() const { return ObjCID; } + void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCID = ID; } + + /// getBuiltinID - Return a value indicating whether this is a builtin + /// function. 0 is not-built-in. 1 is builtin-for-some-nonprimary-target. + /// 2+ are specific builtin functions. + unsigned getBuiltinID() const { return BuiltinID; } + void setBuiltinID(unsigned ID) { + assert(ID < (1 << 12) && "ID too large for field!"); + BuiltinID = ID; + } + + /// isNonPortableBuiltin - Return true if this identifier corresponds to a + /// builtin on some other target, but isn't one on this target, or if it is on + /// the target but not on another, or if it is on both but it differs somehow + /// in behavior. + bool isNonPortableBuiltin() const { return IsNonPortableBuiltin; } + void setNonPortableBuiltin(bool Val) { IsNonPortableBuiltin = Val; } + + /// get/setExtension - Initialize information about whether or not this + /// language token is an extension. This controls extension warnings, and is + /// only valid if a custom token ID is set. + bool isExtensionToken() const { return IsExtension; } + void setIsExtensionToken(bool Val) { IsExtension = Val; } + + /// setIsPoisoned - Mark this identifier as poisoned. After poisoning, the + /// Preprocessor will emit an error every time this token is used. + void setIsPoisoned(bool Value = true) { IsPoisoned = Value; } + + /// isPoisoned - Return true if this token has been poisoned. + bool isPoisoned() const { return IsPoisoned; } + + /// setIsOtherTargetMacro/isOtherTargetMacro control whether this identifier + /// is seen as being a macro on some other target. + void setIsOtherTargetMacro(bool Val = true) { IsOtherTargetMacro = Val; } + bool isOtherTargetMacro() const { return IsOtherTargetMacro; } + + /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether + /// this identifier is a C++ alternate representation of an operator. + void setIsCPlusplusOperatorKeyword(bool Val = true) + { IsCPPOperatorKeyword = Val; } + bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; } + + /// getFETokenInfo/setFETokenInfo - The language front-end is allowed to + /// associate arbitrary metadata with this token. + template<typename T> + T *getFETokenInfo() const { return static_cast<T*>(FETokenInfo); } + void setFETokenInfo(void *T) { FETokenInfo = T; } +}; + + + +/// IdentifierTable - This table implements an efficient mapping from strings to +/// IdentifierInfo nodes. It has no other purpose, but this is an +/// extremely performance-critical piece of the code, as each occurrance of +/// every identifier goes through here when lexed. +class IdentifierTable { + // Shark shows that using MallocAllocator is *much* slower than using this + // BumpPtrAllocator! + typedef llvm::StringMap<IdentifierInfo, llvm::BumpPtrAllocator> HashTableTy; + HashTableTy HashTable; +public: + /// IdentifierTable ctor - Create the identifier table, populating it with + /// info about the language keywords for the language specified by LangOpts. + IdentifierTable(const LangOptions &LangOpts); + + /// get - Return the identifier token info for the specified named identifier. + /// + IdentifierInfo &get(const char *NameStart, const char *NameEnd) { + return HashTable.GetOrCreateValue(NameStart, NameEnd).getValue(); + } + + IdentifierInfo &get(const char *Name) { + return get(Name, Name+strlen(Name)); + } + IdentifierInfo &get(const std::string &Name) { + // Don't use c_str() here: no need to be null terminated. + const char *NameBytes = &Name[0]; + return get(NameBytes, NameBytes+Name.size()); + } + + typedef HashTableTy::const_iterator iterator; + typedef HashTableTy::const_iterator const_iterator; + + iterator begin() const { return HashTable.begin(); } + iterator end() const { return HashTable.end(); } + + /// PrintStats - Print some statistics to stderr that indicate how well the + /// hashing is doing. + void PrintStats() const; +private: + void AddKeywords(const LangOptions &LangOpts); +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h new file mode 100644 index 0000000000..db0c7183d2 --- /dev/null +++ b/include/clang/Lex/Lexer.h @@ -0,0 +1,353 @@ +//===--- Lexer.h - C Language Family Lexer ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Lexer interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEXER_H +#define LLVM_CLANG_LEXER_H + +#include "clang/Lex/LexerToken.h" +#include "clang/Lex/MultipleIncludeOpt.h" +#include "clang/Basic/LangOptions.h" +#include <string> +#include <vector> + +namespace llvm { + class MemoryBuffer; +} + +namespace clang { +class Diagnostic; +class Preprocessor; + +/// Lexer - This provides a simple interface that turns a text buffer into a +/// stream of tokens. This provides no support for file reading or buffering, +/// or buffering/seeking of tokens, only forward lexing is supported. It relies +/// on the specified Preprocessor object to handle preprocessor directives, etc. +class Lexer { + //===--------------------------------------------------------------------===// + // Constant configuration values for this lexer. + const char * const BufferEnd; // End of the buffer. + const llvm::MemoryBuffer *InputFile; // The file we are reading from. + unsigned CurFileID; // FileID for the current input file. + Preprocessor &PP; // Preprocessor object controlling lexing. + LangOptions Features; // Features enabled by this language (cache). + bool Is_PragmaLexer; // True if lexer for _Pragma handling. + bool IsMainFile; // True if top-level file. + + //===--------------------------------------------------------------------===// + // Context-specific lexing flags set by the preprocessor. + // + + /// ParsingPreprocessorDirective - This is true when parsing #XXX. This turns + /// '\n' into a tok::eom token. + bool ParsingPreprocessorDirective; + + /// ParsingFilename - True after #include: this turns <xx> into a + /// tok::angle_string_literal token. + bool ParsingFilename; + + /// LexingRawMode - True if in raw mode: This flag disables interpretation of + /// tokens and is a far faster mode to lex in than non-raw-mode. This flag: + /// 1. If EOF of the current lexer is found, the include stack isn't popped. + /// 2. Identifier information is not looked up for identifier tokens. As an + /// effect of this, implicit macro expansion is naturally disabled. + /// 3. "#" tokens at the start of a line are treated as normal tokens, not + /// implicitly transformed by the lexer. + /// 4. All diagnostic messages are disabled, except for unterminated /*. + /// 5. The only callback made into the preprocessor is to report a hard error + /// on an unterminated '/*' comment. + bool LexingRawMode; + + /// KeepCommentMode - The lexer can optionally keep C & BCPL-style comments, + /// and return them as tokens. This is used for -C and -CC modes. + bool KeepCommentMode; + + //===--------------------------------------------------------------------===// + // Context that changes as the file is lexed. + // NOTE: any state that mutates when in raw mode must have save/restore code + // in Lexer::isNextPPTokenLParen. + + // BufferPtr - Current pointer into the buffer. This is the next character + // to be lexed. + const char *BufferPtr; + + // IsAtStartOfLine - True if the next lexed token should get the "start of + // line" flag set on it. + bool IsAtStartOfLine; + + /// MIOpt - This is a state machine that detects the #ifndef-wrapping a file + /// idiom for the multiple-include optimization. + MultipleIncludeOpt MIOpt; + + /// ConditionalStack - Information about the set of #if/#ifdef/#ifndef blocks + /// we are currently in. + std::vector<PPConditionalInfo> ConditionalStack; + + friend class Preprocessor; +public: + + /// Lexer constructor - Create a new lexer object for the specified buffer + /// with the specified preprocessor managing the lexing process. This lexer + /// assumes that the specified MemoryBuffer and Preprocessor objects will + /// outlive it, but doesn't take ownership of either pointer. + Lexer(const llvm::MemoryBuffer *InBuffer, unsigned CurFileID, + Preprocessor &PP, const char *BufStart = 0, const char *BufEnd = 0); + + /// getFeatures - Return the language features currently enabled. NOTE: this + /// lexer modifies features as a file is parsed! + const LangOptions &getFeatures() const { return Features; } + + /// getCurFileID - Return the FileID for the file we are lexing out of. This + /// implicitly encodes the include path to get to the file. + unsigned getCurFileID() const { return CurFileID; } + + /// setIsMainFile - Mark this lexer as being the lexer for the top-level + /// source file. + void setIsMainFile() { + IsMainFile = true; + } + + /// isMainFile - Return true if this is the top-level file. + /// + bool isMainFile() const { return IsMainFile; } + + /// Lex - Return the next token in the file. If this is the end of file, it + /// return the tok::eof token. Return true if an error occurred and + /// compilation should terminate, false if normal. This implicitly involves + /// the preprocessor. + void Lex(LexerToken &Result) { + // Start a new token. + Result.startToken(); + + // NOTE, any changes here should also change code after calls to + // Preprocessor::HandleDirective + if (IsAtStartOfLine) { + Result.setFlag(LexerToken::StartOfLine); + IsAtStartOfLine = false; + } + + // Get a token. Note that this may delete the current lexer if the end of + // file is reached. + LexTokenInternal(Result); + } + + /// LexRawToken - Switch the lexer to raw mode, lex a token into Result and + /// switch it back. Return true if the 'next character to read' pointer + /// points and the end of the lexer buffer, false otherwise. + bool LexRawToken(LexerToken &Result) { + assert(!LexingRawMode && "Already in raw mode!"); + LexingRawMode = true; + Lex(Result); + LexingRawMode = false; + return BufferPtr == BufferEnd; + } + + /// ReadToEndOfLine - Read the rest of the current preprocessor line as an + /// uninterpreted string. This switches the lexer out of directive mode. + std::string ReadToEndOfLine(); + + + /// Diag - Forwarding function for diagnostics. This translate a source + /// position in the current buffer into a SourceLocation object for rendering. + void Diag(const char *Loc, unsigned DiagID, + const std::string &Msg = std::string()) const; + void Diag(SourceLocation Loc, unsigned DiagID, + const std::string &Msg = std::string()) const; + + /// getSourceLocation - Return a source location identifier for the specified + /// offset in the current file. + SourceLocation getSourceLocation(const char *Loc) const; + + /// Stringify - Convert the specified string into a C string by escaping '\' + /// and " characters. This does not add surrounding ""'s to the string. + /// If Charify is true, this escapes the ' character instead of ". + static std::string Stringify(const std::string &Str, bool Charify = false); + + //===--------------------------------------------------------------------===// + // Internal implementation interfaces. +private: + + /// LexTokenInternal - Internal interface to lex a preprocessing token. Called + /// by Lex. + /// + void LexTokenInternal(LexerToken &Result); + + /// FormTokenWithChars - When we lex a token, we have identified a span + /// starting at BufferPtr, going to TokEnd that forms the token. This method + /// takes that range and assigns it to the token as its location and size. In + /// addition, since tokens cannot overlap, this also updates BufferPtr to be + /// TokEnd. + void FormTokenWithChars(LexerToken &Result, const char *TokEnd) { + Result.setLocation(getSourceLocation(BufferPtr)); + Result.setLength(TokEnd-BufferPtr); + BufferPtr = TokEnd; + } + + /// isNextPPTokenLParen - Return 1 if the next unexpanded token will return a + /// tok::l_paren token, 0 if it is something else and 2 if there are no more + /// tokens in the buffer controlled by this lexer. + unsigned isNextPPTokenLParen(); + + //===--------------------------------------------------------------------===// + // Lexer character reading interfaces. + + // This lexer is built on two interfaces for reading characters, both of which + // automatically provide phase 1/2 translation. getAndAdvanceChar is used + // when we know that we will be reading a character from the input buffer and + // that this character will be part of the result token. This occurs in (f.e.) + // string processing, because we know we need to read until we find the + // closing '"' character. + // + // The second interface is the combination of PeekCharAndSize with + // ConsumeChar. PeekCharAndSize reads a phase 1/2 translated character, + // returning it and its size. If the lexer decides that this character is + // part of the current token, it calls ConsumeChar on it. This two stage + // approach allows us to emit diagnostics for characters (e.g. warnings about + // trigraphs), knowing that they only are emitted if the character is + // consumed. + + + /// getAndAdvanceChar - Read a single 'character' from the specified buffer, + /// advance over it, and return it. This is tricky in several cases. Here we + /// just handle the trivial case and fall-back to the non-inlined + /// getCharAndSizeSlow method to handle the hard case. + inline char getAndAdvanceChar(const char *&Ptr, LexerToken &Tok) { + // If this is not a trigraph and not a UCN or escaped newline, return + // quickly. + if (Ptr[0] != '?' && Ptr[0] != '\\') return *Ptr++; + + unsigned Size = 0; + char C = getCharAndSizeSlow(Ptr, Size, &Tok); + Ptr += Size; + return C; + } + + /// ConsumeChar - When a character (identified by PeekCharAndSize) is consumed + /// and added to a given token, check to see if there are diagnostics that + /// need to be emitted or flags that need to be set on the token. If so, do + /// it. + const char *ConsumeChar(const char *Ptr, unsigned Size, LexerToken &Tok) { + // Normal case, we consumed exactly one token. Just return it. + if (Size == 1) + return Ptr+Size; + + // Otherwise, re-lex the character with a current token, allowing + // diagnostics to be emitted and flags to be set. + Size = 0; + getCharAndSizeSlow(Ptr, Size, &Tok); + return Ptr+Size; + } + + /// getCharAndSize - Peek a single 'character' from the specified buffer, + /// get its size, and return it. This is tricky in several cases. Here we + /// just handle the trivial case and fall-back to the non-inlined + /// getCharAndSizeSlow method to handle the hard case. + inline char getCharAndSize(const char *Ptr, unsigned &Size) { + // If this is not a trigraph and not a UCN or escaped newline, return + // quickly. + if (Ptr[0] != '?' && Ptr[0] != '\\') { + Size = 1; + return *Ptr; + } + + Size = 0; + return getCharAndSizeSlow(Ptr, Size); + } + + /// getCharAndSizeSlow - Handle the slow/uncommon case of the getCharAndSize + /// method. + char getCharAndSizeSlow(const char *Ptr, unsigned &Size, LexerToken *Tok = 0); + + /// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever + /// emit a warning. + static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, + const LangOptions &Features) { + // If this is not a trigraph and not a UCN or escaped newline, return + // quickly. + if (Ptr[0] != '?' && Ptr[0] != '\\') { + Size = 1; + return *Ptr; + } + + Size = 0; + return getCharAndSizeSlowNoWarn(Ptr, Size, Features); + } + + /// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a + /// diagnostic. + static char getCharAndSizeSlowNoWarn(const char *Ptr, unsigned &Size, + const LangOptions &Features); + + //===--------------------------------------------------------------------===// + // #if directive handling. + + /// pushConditionalLevel - When we enter a #if directive, this keeps track of + /// what we are currently in for diagnostic emission (e.g. #if with missing + /// #endif). + void pushConditionalLevel(SourceLocation DirectiveStart, bool WasSkipping, + bool FoundNonSkip, bool FoundElse) { + PPConditionalInfo CI; + CI.IfLoc = DirectiveStart; + CI.WasSkipping = WasSkipping; + CI.FoundNonSkip = FoundNonSkip; + CI.FoundElse = FoundElse; + ConditionalStack.push_back(CI); + } + void pushConditionalLevel(const PPConditionalInfo &CI) { + ConditionalStack.push_back(CI); + } + + /// popConditionalLevel - Remove an entry off the top of the conditional + /// stack, returning information about it. If the conditional stack is empty, + /// this returns true and does not fill in the arguments. + bool popConditionalLevel(PPConditionalInfo &CI) { + if (ConditionalStack.empty()) return true; + CI = ConditionalStack.back(); + ConditionalStack.pop_back(); + return false; + } + + /// peekConditionalLevel - Return the top of the conditional stack. This + /// requires that there be a conditional active. + PPConditionalInfo &peekConditionalLevel() { + assert(!ConditionalStack.empty() && "No conditionals active!"); + return ConditionalStack.back(); + } + + unsigned getConditionalStackDepth() const { return ConditionalStack.size(); } + + //===--------------------------------------------------------------------===// + // Other lexer functions. + + // Helper functions to lex the remainder of a token of the specific type. + void LexIdentifier (LexerToken &Result, const char *CurPtr); + void LexNumericConstant (LexerToken &Result, const char *CurPtr); + void LexStringLiteral (LexerToken &Result, const char *CurPtr,bool Wide); + void LexAngledStringLiteral(LexerToken &Result, const char *CurPtr); + void LexCharConstant (LexerToken &Result, const char *CurPtr); + bool LexEndOfFile (LexerToken &Result, const char *CurPtr); + + void SkipWhitespace (LexerToken &Result, const char *CurPtr); + bool SkipBCPLComment (LexerToken &Result, const char *CurPtr); + bool SkipBlockComment (LexerToken &Result, const char *CurPtr); + bool SaveBCPLComment (LexerToken &Result, const char *CurPtr); + + /// LexIncludeFilename - After the preprocessor has parsed a #include, lex and + /// (potentially) macro expand the filename. If the sequence parsed is not + /// lexically legal, emit a diagnostic and return a result EOM token. + void LexIncludeFilename(LexerToken &Result); +}; + + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/LexerToken.h b/include/clang/Lex/LexerToken.h new file mode 100644 index 0000000000..119a1d92ad --- /dev/null +++ b/include/clang/Lex/LexerToken.h @@ -0,0 +1,137 @@ +//===--- LexerToken.h - Token interface -------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the LexerToken interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEXERTOKEN_H +#define LLVM_CLANG_LEXERTOKEN_H + +#include "clang/Basic/TokenKinds.h" +#include "clang/Basic/SourceLocation.h" + +namespace clang { + +class IdentifierInfo; + +/// LexerToken - This structure provides full information about a lexed token. +/// It is not intended to be space efficient, it is intended to return as much +/// information as possible about each returned token. This is expected to be +/// compressed into a smaller form if memory footprint is important. +class LexerToken { + /// The location and length of the token text itself. + SourceLocation Loc; + unsigned Length; + + /// IdentifierInfo - If this was an identifier, this points to the uniqued + /// information about this identifier. + IdentifierInfo *IdentInfo; + + /// Kind - The actual flavor of token this is. + /// + tok::TokenKind Kind : 8; + + /// Flags - Bits we track about this token, members of the TokenFlags enum. + unsigned Flags : 8; +public: + + // Various flags set per token: + enum TokenFlags { + StartOfLine = 0x01, // At start of line or only after whitespace. + LeadingSpace = 0x02, // Whitespace exists before this token. + DisableExpand = 0x04, // This identifier may never be macro expanded. + NeedsCleaning = 0x08 // Contained an escaped newline or trigraph. + }; + + tok::TokenKind getKind() const { return Kind; } + void setKind(tok::TokenKind K) { Kind = K; } + + /// getLocation - Return a source location identifier for the specified + /// offset in the current file. + SourceLocation getLocation() const { return Loc; } + unsigned getLength() const { return Length; } + + void setLocation(SourceLocation L) { Loc = L; } + void setLength(unsigned Len) { Length = Len; } + + const char *getName() const { return getTokenName(Kind); } + + /// startToken - Reset all flags to cleared. + /// + void startToken() { + Flags = 0; + IdentInfo = 0; + Loc = SourceLocation(); + } + + IdentifierInfo *getIdentifierInfo() const { return IdentInfo; } + void setIdentifierInfo(IdentifierInfo *II) { + IdentInfo = II; + } + + /// setFlag - Set the specified flag. + void setFlag(TokenFlags Flag) { + Flags |= Flag; + } + + /// clearFlag - Unset the specified flag. + void clearFlag(TokenFlags Flag) { + Flags &= ~Flag; + } + + /// setFlagValue - Set a flag to either true or false. + void setFlagValue(TokenFlags Flag, bool Val) { + if (Val) + setFlag(Flag); + else + clearFlag(Flag); + } + + /// isAtStartOfLine - Return true if this token is at the start of a line. + /// + bool isAtStartOfLine() const { return Flags & StartOfLine; } + + /// hasLeadingSpace - Return true if this token has whitespace before it. + /// + bool hasLeadingSpace() const { return Flags & LeadingSpace; } + + /// isExpandDisabled - Return true if this identifier token should never + /// be expanded in the future, due to C99 6.10.3.4p2. + bool isExpandDisabled() const { return Flags & DisableExpand; } + + /// needsCleaning - Return true if this token has trigraphs or escaped + /// newlines in it. + /// + bool needsCleaning() const { return Flags & NeedsCleaning; } +}; + +/// PPConditionalInfo - Information about the conditional stack (#if directives) +/// currently active. +struct PPConditionalInfo { + /// IfLoc - Location where the conditional started. + /// + SourceLocation IfLoc; + + /// WasSkipping - True if this was contained in a skipping directive, e.g. + /// in a "#if 0" block. + bool WasSkipping; + + /// FoundNonSkip - True if we have emitted tokens already, and now we're in + /// an #else block or something. Only useful in Skipping blocks. + bool FoundNonSkip; + + /// FoundElse - True if we've seen a #else in this block. If so, + /// #elif/#else directives are not allowed. + bool FoundElse; +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/LiteralSupport.h b/include/clang/Lex/LiteralSupport.h new file mode 100644 index 0000000000..adeac18af8 --- /dev/null +++ b/include/clang/Lex/LiteralSupport.h @@ -0,0 +1,156 @@ +//===--- LiteralSupport.h ---------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Steve Naroff and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the NumericLiteralParser, CharLiteralParser, and +// StringLiteralParser interfaces. +// +//===----------------------------------------------------------------------===// + +#ifndef CLANG_LITERALSUPPORT_H +#define CLANG_LITERALSUPPORT_H + +#include <string> +#include "llvm/ADT/SmallString.h" + +namespace llvm { + class APInt; +} + +namespace clang { + +class Diagnostic; +class Preprocessor; +class LexerToken; +class SourceLocation; +class TargetInfo; + +/// NumericLiteralParser - This performs strict semantic analysis of the content +/// of a ppnumber, classifying it as either integer, floating, or erroneous, +/// determines the radix of the value and can convert it to a useful value. +class NumericLiteralParser { + Preprocessor &PP; // needed for diagnostics + + const char *const ThisTokBegin; + const char *const ThisTokEnd; + const char *DigitsBegin, *SuffixBegin; // markers + const char *s; // cursor + + unsigned radix; + + bool saw_exponent, saw_period; + bool saw_float_suffix; + +public: + NumericLiteralParser(const char *begin, const char *end, + SourceLocation Loc, Preprocessor &PP); + bool hadError; + bool isUnsigned; + bool isLong; // This is also set for long long. + bool isLongLong; + + bool isIntegerLiteral() const { + return !saw_period && !saw_exponent ? true : false; + } + bool isFloatingLiteral() const { + return saw_period || saw_exponent ? true : false; + } + bool hasSuffix() const { + return SuffixBegin != ThisTokEnd; + } + + unsigned getRadix() const { return radix; } + + /// GetIntegerValue - Convert this numeric literal value to an APInt that + /// matches Val's input width. If there is an overflow (i.e., if the unsigned + /// value read is larger than the APInt's bits will hold), set Val to the low + /// bits of the result and return true. Otherwise, return false. + bool GetIntegerValue(llvm::APInt &Val); + + /// GetFloatValue - Convert this numeric literal to a float. + /// FIXME: the return value is fixed size - make more general. + float GetFloatValue(); + +private: + void Diag(SourceLocation Loc, unsigned DiagID, + const std::string &M = std::string()); + + /// SkipHexDigits - Read and skip over any hex digits, up to End. + /// Return a pointer to the first non-hex digit or End. + const char *SkipHexDigits(const char *ptr) { + while (ptr != ThisTokEnd && isxdigit(*ptr)) + ptr++; + return ptr; + } + + /// SkipOctalDigits - Read and skip over any octal digits, up to End. + /// Return a pointer to the first non-hex digit or End. + const char *SkipOctalDigits(const char *ptr) { + while (ptr != ThisTokEnd && ((*ptr >= '0') && (*ptr <= '7'))) + ptr++; + return ptr; + } + + /// SkipDigits - Read and skip over any digits, up to End. + /// Return a pointer to the first non-hex digit or End. + const char *SkipDigits(const char *ptr) { + while (ptr != ThisTokEnd && isdigit(*ptr)) + ptr++; + return ptr; + } + + /// SkipBinaryDigits - Read and skip over any binary digits, up to End. + /// Return a pointer to the first non-binary digit or End. + const char *SkipBinaryDigits(const char *ptr) { + while (ptr != ThisTokEnd && (*ptr == '0' || *ptr == '1')) + ptr++; + return ptr; + } + +}; + +/// CharLiteralParser - Perform interpretation and semantic analysis of a +/// character literal. +class CharLiteralParser { + unsigned Value; + bool IsWide; + bool HadError; +public: + CharLiteralParser(const char *begin, const char *end, + SourceLocation Loc, Preprocessor &PP); + + bool hadError() const { return HadError; } + bool isWide() const { return IsWide; } + unsigned getValue() const { return Value; } +}; + +/// StringLiteralParser - This decodes string escape characters and performs +/// wide string analysis and Translation Phase #6 (concatenation of string +/// literals) (C99 5.1.1.2p1). +class StringLiteralParser { + Preprocessor &PP; + TargetInfo &Target; + + unsigned MaxTokenLength; + unsigned SizeBound; + unsigned wchar_tByteWidth; + llvm::SmallString<512> ResultBuf; + char *ResultPtr; // cursor +public: + StringLiteralParser(const LexerToken *StringToks, unsigned NumStringToks, + Preprocessor &PP, TargetInfo &T); + bool hadError; + bool AnyWide; + + const char *GetString() { return &ResultBuf[0]; } + unsigned GetStringLength() { return ResultPtr-&ResultBuf[0]; } +}; + +} // end namespace clang + +#endif
\ No newline at end of file diff --git a/include/clang/Lex/MacroExpander.h b/include/clang/Lex/MacroExpander.h new file mode 100644 index 0000000000..8bb4ebfd5d --- /dev/null +++ b/include/clang/Lex/MacroExpander.h @@ -0,0 +1,179 @@ +//===--- MacroExpander.h - Lex from a macro expansion -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MacroExpander and MacroArgs interfaces. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_MACROEXPANDER_H +#define LLVM_CLANG_MACROEXPANDER_H + +#include "clang/Basic/SourceLocation.h" +#include <vector> + +namespace clang { + class MacroInfo; + class Preprocessor; + class LexerToken; + +/// MacroArgs - An instance of this class captures information about +/// the formal arguments specified to a function-like macro invocation. +class MacroArgs { + /// NumUnexpArgTokens - The number of raw, unexpanded tokens for the + /// arguments. All of the actual argument tokens are allocated immediately + /// after the MacroArgs object in memory. This is all of the arguments + /// concatenated together, with 'EOF' markers at the end of each argument. + unsigned NumUnexpArgTokens; + + /// PreExpArgTokens - Pre-expanded tokens for arguments that need them. Empty + /// if not yet computed. This includes the EOF marker at the end of the + /// stream. + std::vector<std::vector<LexerToken> > PreExpArgTokens; + + /// StringifiedArgs - This contains arguments in 'stringified' form. If the + /// stringified form of an argument has not yet been computed, this is empty. + std::vector<LexerToken> StringifiedArgs; + + /// VarargsElided - True if this is a C99 style varargs macro invocation and + /// there was no argument specified for the "..." argument. If the argument + /// was specified (even empty) or this isn't a C99 style varargs function, or + /// if in strict mode and the C99 varargs macro had only a ... argument, this + /// is false. + bool VarargsElided; + + MacroArgs(unsigned NumToks, bool varargsElided) + : NumUnexpArgTokens(NumToks), VarargsElided(varargsElided) {} + ~MacroArgs() {} +public: + /// MacroArgs ctor function - Create a new MacroArgs object with the specified + /// macro and argument info. + static MacroArgs *create(const MacroInfo *MI, + const LexerToken *UnexpArgTokens, + unsigned NumArgTokens, bool VarargsElided); + + /// destroy - Destroy and deallocate the memory for this object. + /// + void destroy(); + + /// ArgNeedsPreexpansion - If we can prove that the argument won't be affected + /// by pre-expansion, return false. Otherwise, conservatively return true. + bool ArgNeedsPreexpansion(const LexerToken *ArgTok) const; + + /// getUnexpArgument - Return a pointer to the first token of the unexpanded + /// token list for the specified formal. + /// + const LexerToken *getUnexpArgument(unsigned Arg) const; + + /// getArgLength - Given a pointer to an expanded or unexpanded argument, + /// return the number of tokens, not counting the EOF, that make up the + /// argument. + static unsigned getArgLength(const LexerToken *ArgPtr); + + /// getPreExpArgument - Return the pre-expanded form of the specified + /// argument. + const std::vector<LexerToken> & + getPreExpArgument(unsigned Arg, Preprocessor &PP); + + /// getStringifiedArgument - Compute, cache, and return the specified argument + /// that has been 'stringified' as required by the # operator. + const LexerToken &getStringifiedArgument(unsigned ArgNo, Preprocessor &PP); + + /// getNumArguments - Return the number of arguments passed into this macro + /// invocation. + unsigned getNumArguments() const { return NumUnexpArgTokens; } + + + /// isVarargsElidedUse - Return true if this is a C99 style varargs macro + /// invocation and there was no argument specified for the "..." argument. If + /// the argument was specified (even empty) or this isn't a C99 style varargs + /// function, or if in strict mode and the C99 varargs macro had only a ... + /// argument, this returns false. + bool isVarargsElidedUse() const { return VarargsElided; } +}; + + +/// MacroExpander - This implements a lexer that returns token from a macro body +/// or token stream instead of lexing from a character buffer. +/// +class MacroExpander { + /// Macro - The macro we are expanding from. This is null if expanding a + /// token stream. + /// + MacroInfo *Macro; + + /// ActualArgs - The actual arguments specified for a function-like macro, or + /// null. The MacroExpander owns the pointed-to object. + MacroArgs *ActualArgs; + + /// PP - The current preprocessor object we are expanding for. + /// + Preprocessor &PP; + + /// MacroTokens - This is the pointer to an array of tokens that the macro is + /// defined to, with arguments expanded for function-like macros. If this is + /// a token stream, these are the tokens we are returning. + const LexerToken *MacroTokens; + + /// NumMacroTokens - This is the length of the MacroTokens array. + /// + unsigned NumMacroTokens; + + /// CurToken - This is the next token that Lex will return. + /// + unsigned CurToken; + + /// InstantiateLoc - The source location where this macro was instantiated. + /// + SourceLocation InstantiateLoc; + + /// Lexical information about the expansion point of the macro: the identifier + /// that the macro expanded from had these properties. + bool AtStartOfLine, HasLeadingSpace; + + MacroExpander(const MacroExpander&); // DO NOT IMPLEMENT + void operator=(const MacroExpander&); // DO NOT IMPLEMENT +public: + /// Create a macro expander for the specified macro with the specified actual + /// arguments. Note that this ctor takes ownership of the ActualArgs pointer. + MacroExpander(LexerToken &Tok, MacroArgs *ActualArgs, Preprocessor &PP); + + /// Create a macro expander for the specified token stream. This does not + /// take ownership of the specified token vector. + MacroExpander(const LexerToken *TokArray, unsigned NumToks, Preprocessor &PP); + ~MacroExpander(); + + /// isNextTokenLParen - If the next token lexed will pop this macro off the + /// expansion stack, return 2. If the next unexpanded token is a '(', return + /// 1, otherwise return 0. + unsigned isNextTokenLParen() const; + + /// Lex - Lex and return a token from this macro stream. + void Lex(LexerToken &Tok); + +private: + /// isAtEnd - Return true if the next lex call will pop this macro off the + /// include stack. + bool isAtEnd() const { + return CurToken == NumMacroTokens; + } + + /// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ## + /// operator. Read the ## and RHS, and paste the LHS/RHS together. If there + /// are is another ## after it, chomp it iteratively. Return the result as + /// Tok. + void PasteTokens(LexerToken &Tok); + + /// Expand the arguments of a function-like macro so that we can quickly + /// return preexpanded tokens from MacroTokens. + void ExpandFunctionArguments(); +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/MacroInfo.h b/include/clang/Lex/MacroInfo.h new file mode 100644 index 0000000000..ad489de744 --- /dev/null +++ b/include/clang/Lex/MacroInfo.h @@ -0,0 +1,186 @@ +//===--- MacroInfo.h - Information about #defined identifiers ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MacroInfo interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_MACROINFO_H +#define LLVM_CLANG_MACROINFO_H + +#include "clang/Lex/LexerToken.h" +#include <vector> + +namespace clang { + class Preprocessor; + +/// MacroInfo - Each identifier that is #define'd has an instance of this class +/// associated with it, used to implement macro expansion. +class MacroInfo { + //===--------------------------------------------------------------------===// + // State set when the macro is defined. + + /// Location - This is the place the macro is defined. + SourceLocation Location; + + /// Arguments - The list of arguments for a function-like macro. This can be + /// empty, for, e.g. "#define X()". In a C99-style variadic macro, this + /// includes the __VA_ARGS__ identifier on the list. + std::vector<IdentifierInfo*> Arguments; + + /// ReplacementTokens - This is the list of tokens that the macro is defined + /// to. + std::vector<LexerToken> ReplacementTokens; + + /// IsFunctionLike - True if this macro is a function-like macro, false if it + /// is an object-like macro. + bool IsFunctionLike : 1; + + /// IsC99Varargs - True if this macro is of the form "#define X(...)" or + /// "#define X(Y,Z,...)". The __VA_ARGS__ token should be replaced with the + /// contents of "..." in an invocation. + bool IsC99Varargs : 1; + + /// IsGNUVarargs - True if this macro is of the form "#define X(a...)". The + /// "a" identifier in th replacement list will be replaced with all arguments + /// of the macro starting with the specified one. + bool IsGNUVarargs : 1; + + /// IsBuiltinMacro - True if this is a builtin macro, such as __LINE__, and if + /// it has not yet been redefined or undefined. + bool IsBuiltinMacro : 1; + + /// IsTargetSpecific - True if this is a target-specific macro defined with + /// #define_target. + bool IsTargetSpecific : 1; +private: + //===--------------------------------------------------------------------===// + // State that changes as the macro is used. + + /// IsDisabled - True if we have started an expansion of this macro already. + /// This disbles recursive expansion, which would be quite bad for things like + /// #define A A. + bool IsDisabled : 1; + + /// IsUsed - True if this macro is either defined in the main file and has + /// been used, or if it is not defined in the main file. This is used to + /// emit -Wunused-macros diagnostics. + bool IsUsed : 1; +public: + MacroInfo(SourceLocation DefLoc); + + /// getDefinitionLoc - Return the location that the macro was defined at. + /// + SourceLocation getDefinitionLoc() const { return Location; } + + /// isIdenticalTo - Return true if the specified macro definition is equal to + /// this macro in spelling, arguments, and whitespace. This is used to emit + /// duplicate definition warnings. This implements the rules in C99 6.10.3. + bool isIdenticalTo(const MacroInfo &Other, Preprocessor &PP) const; + + /// setIsBuiltinMacro - Set or clear the isBuiltinMacro flag. + /// + void setIsBuiltinMacro(bool Val = true) { + IsBuiltinMacro = Val; + } + + /// setIsTargetSpecific - Set or clear the IsTargetSpecific flag. + /// + void setIsTargetSpecific(bool Val = true) { + IsTargetSpecific = Val; + } + bool isTargetSpecific() const { return IsTargetSpecific; } + + /// setIsUsed - Set the value of the IsUsed flag. + /// + void setIsUsed(bool Val) { + IsUsed = Val; + } + + /// addArgument - Add an argument to the list of formal arguments for this + /// function-like macro. + void addArgument(IdentifierInfo *Arg) { + Arguments.push_back(Arg); + } + + /// getArgumentNum - Return the argument number of the specified identifier, + /// or -1 if the identifier is not a formal argument identifier. + int getArgumentNum(IdentifierInfo *Arg) { + for (unsigned i = 0, e = Arguments.size(); i != e; ++i) + if (Arguments[i] == Arg) return i; + return -1; + } + + /// Arguments - The list of arguments for a function-like macro. This can be + /// empty, for, e.g. "#define X()". + typedef std::vector<IdentifierInfo*>::const_iterator arg_iterator; + arg_iterator arg_begin() const { return Arguments.begin(); } + arg_iterator arg_end() const { return Arguments.end(); } + unsigned getNumArgs() const { return Arguments.size(); } + + /// Function/Object-likeness. Keep track of whether this macro has formal + /// parameters. + void setIsFunctionLike() { IsFunctionLike = true; } + bool isFunctionLike() const { return IsFunctionLike; } + bool isObjectLike() const { return !IsFunctionLike; } + + /// Varargs querying methods. This can only be set for function-like macros. + void setIsC99Varargs() { IsC99Varargs = true; } + void setIsGNUVarargs() { IsGNUVarargs = true; } + bool isC99Varargs() const { return IsC99Varargs; } + bool isGNUVarargs() const { return IsGNUVarargs; } + bool isVariadic() const { return IsC99Varargs | IsGNUVarargs; } + + /// isBuiltinMacro - Return true if this macro is a builtin macro, such as + /// __LINE__, which requires processing before expansion. + bool isBuiltinMacro() const { return IsBuiltinMacro; } + + /// isUsed - Return false if this macro is defined in the main file and has + /// not yet been used. + bool isUsed() const { return IsUsed; } + + /// getNumTokens - Return the number of tokens that this macro expands to. + /// + unsigned getNumTokens() const { + return ReplacementTokens.size(); + } + + const LexerToken &getReplacementToken(unsigned Tok) const { + assert(Tok < ReplacementTokens.size() && "Invalid token #"); + return ReplacementTokens[Tok]; + } + + const std::vector<LexerToken> &getReplacementTokens() const { + return ReplacementTokens; + } + + /// AddTokenToBody - Add the specified token to the replacement text for the + /// macro. + void AddTokenToBody(const LexerToken &Tok) { + ReplacementTokens.push_back(Tok); + } + + /// isEnabled - Return true if this macro is enabled: in other words, that we + /// are not currently in an expansion of this macro. + bool isEnabled() const { return !IsDisabled; } + + void EnableMacro() { + assert(IsDisabled && "Cannot enable an already-enabled macro!"); + IsDisabled = false; + } + + void DisableMacro() { + assert(!IsDisabled && "Cannot disable an already-disabled macro!"); + IsDisabled = true; + } +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/MultipleIncludeOpt.h b/include/clang/Lex/MultipleIncludeOpt.h new file mode 100644 index 0000000000..e4ec5b8c35 --- /dev/null +++ b/include/clang/Lex/MultipleIncludeOpt.h @@ -0,0 +1,107 @@ +//===--- MultipleIncludeOpt.h - Header Multiple-Include Optzn ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MultipleIncludeOpt interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_MULTIPLEINCLUDEOPT_H +#define LLVM_CLANG_MULTIPLEINCLUDEOPT_H + +namespace clang { +class IdentifierInfo; + +/// MultipleIncludeOpt - This class implements the simple state machine that the +/// Lexer class uses to detect files subject to the 'multiple-include' +/// optimization. The public methods in this class are triggered by various +/// events that occur when a file is lexed, and after the entire file is lexed, +/// information about which macro (if any) controls the header is returned. +class MultipleIncludeOpt { + /// ReadAnyTokens - This is set to false when a file is first opened and true + /// any time a token is returned to the client or a (non-multiple-include) + /// directive is parsed. When the final #endif is parsed this is reset back + /// to false, that way any tokens before the first #ifdef or after the last + /// #endif can be easily detected. + bool ReadAnyTokens; + + /// TheMacro - The controlling macro for a file, if valid. + /// + const IdentifierInfo *TheMacro; +public: + MultipleIncludeOpt() : ReadAnyTokens(false), TheMacro(0) {} + + /// Invalidate - Permenantly mark this file as not being suitable for the + /// include-file optimization. + void Invalidate() { + // If we have read tokens but have no controlling macro, the state-machine + // below can never "accept". + ReadAnyTokens = true; + TheMacro = 0; + } + + /// getHasReadAnyTokensVal - This is used for the #ifndef hande-shake at the + /// top of the file when reading preprocessor directives. Otherwise, reading + /// the "ifndef x" would count as reading tokens. + bool getHasReadAnyTokensVal() const { return ReadAnyTokens; } + + // If a token is read, remember that we have seen a side-effect in this file. + void ReadToken() { ReadAnyTokens = true; } + + /// EnterTopLevelIFNDEF - When entering a top-level #ifndef directive (or the + /// "#if !defined" equivalent) without any preceding tokens, this method is + /// called. + void EnterTopLevelIFNDEF(const IdentifierInfo *M) { + // Note, we don't care about the input value of 'ReadAnyTokens'. The caller + // ensures that this is only called if there are no tokens read before the + // #ifndef. + + // If the macro is already set, this is after the top-level #endif. + if (TheMacro) + return Invalidate(); + + // Remember that we're in the #if and that we have the macro. + ReadAnyTokens = true; + TheMacro = M; + } + + /// FoundTopLevelElse - This is invoked when an #else/#elif directive is found + /// in the top level conditional in the file. + void FoundTopLevelElse() { + /// If a #else directive is found at the top level, there is a chunk of the + /// file not guarded by the controlling macro. + Invalidate(); + } + + /// ExitTopLevelConditional - This method is called when the lexer exits the + /// top-level conditional. + void ExitTopLevelConditional() { + // If we have a macro, that means the top of the file was ok. Set our state + // back to "not having read any tokens" so we can detect anything after the + // #endif. + if (!TheMacro) return Invalidate(); + + // At this point, we haven't "read any tokens" but we do have a controlling + // macro. + ReadAnyTokens = false; + } + + /// GetControllingMacroAtEndOfFile - Once the entire file has been lexed, if + /// there is a controlling macro, return it. + const IdentifierInfo *GetControllingMacroAtEndOfFile() const { + // If we haven't read any tokens after the #endif, return the controlling + // macro if it's valid (if it isn't, it will be null). + if (!ReadAnyTokens) + return TheMacro; + return 0; + } +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/PPCallbacks.h b/include/clang/Lex/PPCallbacks.h new file mode 100644 index 0000000000..def8072dac --- /dev/null +++ b/include/clang/Lex/PPCallbacks.h @@ -0,0 +1,53 @@ +//===--- PPCallbacks.h - Callbacks for Preprocessor actions -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PPCallbacks interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_PPCALLBACKS_H +#define LLVM_CLANG_LEX_PPCALLBACKS_H + +#include "clang/Lex/DirectoryLookup.h" +#include "clang/Basic/SourceLocation.h" +#include <string> + +namespace clang { + class SourceLocation; + +/// PPCallbacks - This interface provides a way to observe the actions of the +/// preprocessor as it does its thing. Clients can define their hooks here to +/// implement preprocessor level tools. +class PPCallbacks { +public: + virtual ~PPCallbacks(); + + enum FileChangeReason { + EnterFile, ExitFile, SystemHeaderPragma, RenameFile + }; + + /// FileChanged - This callback is invoked whenever a source file is + /// entered or exited. The SourceLocation indicates the new location, and + /// EnteringFile indicates whether this is because we are entering a new + /// #include'd file (when true) or whether we're exiting one because we ran + /// off the end (when false). + virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason, + DirectoryLookup::DirType FileType) { + } + + /// Ident - This callback is invoked when a #ident or #sccs directive is read. + /// + virtual void Ident(SourceLocation Loc, const std::string &str) { + } + +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/Pragma.h b/include/clang/Lex/Pragma.h new file mode 100644 index 0000000000..5e35ae02de --- /dev/null +++ b/include/clang/Lex/Pragma.h @@ -0,0 +1,82 @@ +//===--- Pragma.h - Pragma registration and handling ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PragmaHandler and PragmaTable interfaces. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_PRAGMA_H +#define LLVM_CLANG_PRAGMA_H + +#include <cassert> +#include <vector> + +namespace clang { + class Preprocessor; + class LexerToken; + class IdentifierInfo; + class PragmaNamespace; + +/// PragmaHandler - Instances of this interface defined to handle the various +/// pragmas that the language front-end uses. Each handler optionally has a +/// name (e.g. "pack") and the HandlePragma method is invoked when a pragma with +/// that identifier is found. If a handler does not match any of the declared +/// pragmas the handler with a null identifier is invoked, if it exists. +/// +/// Note that the PragmaNamespace class can be used to subdivide pragmas, e.g. +/// we treat "#pragma STDC" and "#pragma GCC" as namespaces that contain other +/// pragmas. +class PragmaHandler { + const IdentifierInfo *Name; +public: + PragmaHandler(const IdentifierInfo *name) : Name(name) {} + virtual ~PragmaHandler(); + + const IdentifierInfo *getName() const { return Name; } + virtual void HandlePragma(Preprocessor &PP, LexerToken &FirstToken) = 0; + + /// getIfNamespace - If this is a namespace, return it. This is equivalent to + /// using a dynamic_cast, but doesn't require RTTI. + virtual PragmaNamespace *getIfNamespace() { return 0; } +}; + +/// PragmaNamespace - This PragmaHandler subdivides the namespace of pragmas, +/// allowing hierarchical pragmas to be defined. Common examples of namespaces +/// are "#pragma GCC", "#pragma STDC", and "#pragma omp", but any namespaces may +/// be (potentially recursively) defined. +class PragmaNamespace : public PragmaHandler { + /// Handlers - This is the list of handlers in this namespace. + /// + std::vector<PragmaHandler*> Handlers; +public: + PragmaNamespace(const IdentifierInfo *Name) : PragmaHandler(Name) {} + virtual ~PragmaNamespace(); + + /// FindHandler - Check to see if there is already a handler for the + /// specified name. If not, return the handler for the null identifier if it + /// exists, otherwise return null. If IgnoreNull is true (the default) then + /// the null handler isn't returned on failure to match. + PragmaHandler *FindHandler(const IdentifierInfo *Name, + bool IgnoreNull = true) const; + + /// AddPragma - Add a pragma to this namespace. + /// + void AddPragma(PragmaHandler *Handler) { + Handlers.push_back(Handler); + } + + virtual void HandlePragma(Preprocessor &PP, LexerToken &FirstToken); + + virtual PragmaNamespace *getIfNamespace() { return this; } +}; + + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h new file mode 100644 index 0000000000..e3aff76012 --- /dev/null +++ b/include/clang/Lex/Preprocessor.h @@ -0,0 +1,454 @@ +//===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Preprocessor interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_PREPROCESSOR_H +#define LLVM_CLANG_LEX_PREPROCESSOR_H + +#include "clang/Lex/IdentifierTable.h" +#include "clang/Lex/Lexer.h" +#include "clang/Lex/MacroExpander.h" +#include "clang/Basic/SourceLocation.h" + +namespace clang { + +class SourceManager; +class FileManager; +class FileEntry; +class HeaderSearch; +class PragmaNamespace; +class PragmaHandler; +class ScratchBuffer; +class TargetInfo; +class PPCallbacks; +class DirectoryLookup; + +/// Preprocessor - This object forms engages in a tight little dance to +/// efficiently preprocess tokens. Lexers know only about tokens within a +/// single source file, and don't know anything about preprocessor-level issues +/// like the #include stack, token expansion, etc. +/// +class Preprocessor { + Diagnostic &Diags; + const LangOptions &Features; + TargetInfo &Target; + FileManager &FileMgr; + SourceManager &SourceMgr; + ScratchBuffer *ScratchBuf; + HeaderSearch &HeaderInfo; + + /// Identifiers for builtin macros and other builtins. + IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__ + IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__ + IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__ + IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__ + IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__ + IdentifierInfo *Ident_Pragma, *Ident__VA_ARGS__; // _Pragma, __VA_ARGS__ + + SourceLocation DATELoc, TIMELoc; + + enum { + /// MaxIncludeStackDepth - Maximum depth of #includes. + MaxAllowedIncludeStackDepth = 200 + }; + + // State that is set before the preprocessor begins. + bool KeepComments : 1; + bool KeepMacroComments : 1; + + // State that changes while the preprocessor runs: + bool DisableMacroExpansion : 1; // True if macro expansion is disabled. + bool InMacroArgs : 1; // True if parsing fn macro invocation args. + + /// Identifiers - This is mapping/lookup information for all identifiers in + /// the program, including program keywords. + IdentifierTable Identifiers; + + /// PragmaHandlers - This tracks all of the pragmas that the client registered + /// with this preprocessor. + PragmaNamespace *PragmaHandlers; + + /// CurLexer - This is the current top of the stack that we're lexing from if + /// not expanding a macro. One of CurLexer and CurMacroExpander must be null. + Lexer *CurLexer; + + /// CurLookup - The DirectoryLookup structure used to find the current + /// FileEntry, if CurLexer is non-null and if applicable. This allows us to + /// implement #include_next and find directory-specific properties. + const DirectoryLookup *CurDirLookup; + + /// CurMacroExpander - This is the current macro we are expanding, if we are + /// expanding a macro. One of CurLexer and CurMacroExpander must be null. + MacroExpander *CurMacroExpander; + + /// IncludeMacroStack - This keeps track of the stack of files currently + /// #included, and macros currently being expanded from, not counting + /// CurLexer/CurMacroExpander. + struct IncludeStackInfo { + Lexer *TheLexer; + const DirectoryLookup *TheDirLookup; + MacroExpander *TheMacroExpander; + IncludeStackInfo(Lexer *L, const DirectoryLookup *D, MacroExpander *M) + : TheLexer(L), TheDirLookup(D), TheMacroExpander(M) { + } + }; + std::vector<IncludeStackInfo> IncludeMacroStack; + + /// Callbacks - These are actions invoked when some preprocessor activity is + /// encountered (e.g. a file is #included, etc). + PPCallbacks *Callbacks; + + // Various statistics we track for performance analysis. + unsigned NumDirectives, NumIncluded, NumDefined, NumUndefined, NumPragma; + unsigned NumIf, NumElse, NumEndif; + unsigned NumEnteredSourceFiles, MaxIncludeStackDepth; + unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded; + unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste; + unsigned NumSkipped; +public: + Preprocessor(Diagnostic &diags, const LangOptions &opts, TargetInfo &target, + SourceManager &SM, HeaderSearch &Headers); + ~Preprocessor(); + + Diagnostic &getDiagnostics() const { return Diags; } + const LangOptions &getLangOptions() const { return Features; } + TargetInfo &getTargetInfo() const { return Target; } + FileManager &getFileManager() const { return FileMgr; } + SourceManager &getSourceManager() const { return SourceMgr; } + HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; } + + IdentifierTable &getIdentifierTable() { return Identifiers; } + + /// SetCommentRetentionState - Control whether or not the preprocessor retains + /// comments in output. + void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) { + this->KeepComments = KeepComments | KeepMacroComments; + this->KeepMacroComments = KeepMacroComments; + } + + bool getCommentRetentionState() const { return KeepComments; } + + /// isCurrentLexer - Return true if we are lexing directly from the specified + /// lexer. + bool isCurrentLexer(const Lexer *L) const { + return CurLexer == L; + } + + /// isInPrimaryFile - Return true if we're in the top-level file, not in a + /// #include. + bool isInPrimaryFile() const; + + /// getCurrentLexer - Return the current file lexer being lexed from. Note + /// that this ignores any potentially active macro expansions and _Pragma + /// expansions going on at the time. + Lexer *getCurrentFileLexer() const; + + /// getPPCallbacks/SetPPCallbacks - Accessors for preprocessor callbacks. + /// + PPCallbacks *getPPCallbacks() const { return Callbacks; } + void setPPCallbacks(PPCallbacks *C) { + Callbacks = C; + } + + /// getIdentifierInfo - Return information about the specified preprocessor + /// identifier token. The version of this method that takes two character + /// pointers is preferred unless the identifier is already available as a + /// string (this avoids allocation and copying of memory to construct an + /// std::string). + IdentifierInfo *getIdentifierInfo(const char *NameStart, + const char *NameEnd) { + return &Identifiers.get(NameStart, NameEnd); + } + IdentifierInfo *getIdentifierInfo(const char *NameStr) { + return getIdentifierInfo(NameStr, NameStr+strlen(NameStr)); + } + + /// AddPragmaHandler - Add the specified pragma handler to the preprocessor. + /// If 'Namespace' is non-null, then it is a token required to exist on the + /// pragma line before the pragma string starts, e.g. "STDC" or "GCC". + void AddPragmaHandler(const char *Namespace, PragmaHandler *Handler); + + /// EnterSourceFile - Add a source file to the top of the include stack and + /// start lexing tokens from it instead of the current buffer. If isMainFile + /// is true, this is the main file for the translation unit. + void EnterSourceFile(unsigned CurFileID, const DirectoryLookup *Dir, + bool isMainFile = false); + + /// EnterMacro - Add a Macro to the top of the include stack and start lexing + /// tokens from it instead of the current buffer. Args specifies the + /// tokens input to a function-like macro. + void EnterMacro(LexerToken &Identifier, MacroArgs *Args); + + /// EnterTokenStream - Add a "macro" context to the top of the include stack, + /// which will cause the lexer to start returning the specified tokens. Note + /// that these tokens will be re-macro-expanded when/if expansion is enabled. + /// This method assumes that the specified stream of tokens has a permanent + /// owner somewhere, so they do not need to be copied. + void EnterTokenStream(const LexerToken *Toks, unsigned NumToks); + + /// RemoveTopOfLexerStack - Pop the current lexer/macro exp off the top of the + /// lexer stack. This should only be used in situations where the current + /// state of the top-of-stack lexer is known. + void RemoveTopOfLexerStack(); + + /// Lex - To lex a token from the preprocessor, just pull a token from the + /// current lexer or macro object. + void Lex(LexerToken &Result) { + if (CurLexer) + CurLexer->Lex(Result); + else + CurMacroExpander->Lex(Result); + } + + /// LexNonComment - Lex a token. If it's a comment, keep lexing until we get + /// something not a comment. This is useful in -E -C mode where comments + /// would foul up preprocessor directive handling. + void LexNonComment(LexerToken &Result) { + do + Lex(Result); + while (Result.getKind() == tok::comment); + } + + /// LexUnexpandedToken - This is just like Lex, but this disables macro + /// expansion of identifier tokens. + void LexUnexpandedToken(LexerToken &Result) { + // Disable macro expansion. + bool OldVal = DisableMacroExpansion; + DisableMacroExpansion = true; + // Lex the token. + Lex(Result); + + // Reenable it. + DisableMacroExpansion = OldVal; + } + + /// Diag - Forwarding function for diagnostics. This emits a diagnostic at + /// the specified LexerToken's location, translating the token's start + /// position in the current buffer into a SourcePosition object for rendering. + void Diag(SourceLocation Loc, unsigned DiagID); + void Diag(SourceLocation Loc, unsigned DiagID, const std::string &Msg); + void Diag(const LexerToken &Tok, unsigned DiagID) { + Diag(Tok.getLocation(), DiagID); + } + void Diag(const LexerToken &Tok, unsigned DiagID, const std::string &Msg) { + Diag(Tok.getLocation(), DiagID, Msg); + } + + /// getSpelling() - Return the 'spelling' of the Tok token. The spelling of a + /// token is the characters used to represent the token in the source file + /// after trigraph expansion and escaped-newline folding. In particular, this + /// wants to get the true, uncanonicalized, spelling of things like digraphs + /// UCNs, etc. + std::string getSpelling(const LexerToken &Tok) const; + + /// getSpelling - This method is used to get the spelling of a token into a + /// preallocated buffer, instead of as an std::string. The caller is required + /// to allocate enough space for the token, which is guaranteed to be at least + /// Tok.getLength() bytes long. The length of the actual result is returned. + /// + /// Note that this method may do two possible things: it may either fill in + /// the buffer specified with characters, or it may *change the input pointer* + /// to point to a constant buffer with the data already in it (avoiding a + /// copy). The caller is not allowed to modify the returned buffer pointer + /// if an internal buffer is returned. + unsigned getSpelling(const LexerToken &Tok, const char *&Buffer) const; + + + /// CreateString - Plop the specified string into a scratch buffer and return + /// a location for it. If specified, the source location provides a source + /// location for the token. + SourceLocation CreateString(const char *Buf, unsigned Len, + SourceLocation SourceLoc = SourceLocation()); + + /// DumpToken - Print the token to stderr, used for debugging. + /// + void DumpToken(const LexerToken &Tok, bool DumpFlags = false) const; + void DumpMacro(const MacroInfo &MI) const; + + /// IncrementPasteCounter - Increment the counters for the number of token + /// paste operations performed. If fast was specified, this is a 'fast paste' + /// case we handled. + /// + void IncrementPasteCounter(bool isFast) { + if (isFast) + ++NumFastTokenPaste; + else + ++NumTokenPaste; + } + + void PrintStats(); + + //===--------------------------------------------------------------------===// + // Preprocessor callback methods. These are invoked by a lexer as various + // directives and events are found. + + /// LookUpIdentifierInfo - Given a tok::identifier token, look up the + /// identifier information for the token and install it into the token. + IdentifierInfo *LookUpIdentifierInfo(LexerToken &Identifier, + const char *BufPtr = 0); + + /// HandleIdentifier - This callback is invoked when the lexer reads an + /// identifier and has filled in the tokens IdentifierInfo member. This + /// callback potentially macro expands it or turns it into a named token (like + /// 'for'). + void HandleIdentifier(LexerToken &Identifier); + + + /// HandleEndOfFile - This callback is invoked when the lexer hits the end of + /// the current file. This either returns the EOF token and returns true, or + /// pops a level off the include stack and returns false, at which point the + /// client should call lex again. + bool HandleEndOfFile(LexerToken &Result, bool isEndOfMacro = false); + + /// HandleEndOfMacro - This callback is invoked when the lexer hits the end of + /// the current macro line. It returns true if Result is filled in with a + /// token, or false if Lex should be called again. + bool HandleEndOfMacro(LexerToken &Result); + + /// HandleDirective - This callback is invoked when the lexer sees a # token + /// at the start of a line. This consumes the directive, modifies the + /// lexer/preprocessor state, and advances the lexer(s) so that the next token + /// read is the correct one. + void HandleDirective(LexerToken &Result); + + /// CheckEndOfDirective - Ensure that the next token is a tok::eom token. If + /// not, emit a diagnostic and consume up until the eom. + void CheckEndOfDirective(const char *Directive); +private: + + /// DiscardUntilEndOfDirective - Read and discard all tokens remaining on the + /// current line until the tok::eom token is found. + void DiscardUntilEndOfDirective(); + + /// ReadMacroName - Lex and validate a macro name, which occurs after a + /// #define or #undef. This emits a diagnostic, sets the token kind to eom, + /// and discards the rest of the macro line if the macro name is invalid. + void ReadMacroName(LexerToken &MacroNameTok, char isDefineUndef = 0); + + /// ReadMacroDefinitionArgList - The ( starting an argument list of a macro + /// definition has just been read. Lex the rest of the arguments and the + /// closing ), updating MI with what we learn. Return true if an error occurs + /// parsing the arg list. + bool ReadMacroDefinitionArgList(MacroInfo *MI); + + /// SkipExcludedConditionalBlock - We just read a #if or related directive and + /// decided that the subsequent tokens are in the #if'd out portion of the + /// file. Lex the rest of the file, until we see an #endif. If + /// FoundNonSkipPortion is true, then we have already emitted code for part of + /// this #if directive, so #else/#elif blocks should never be entered. If + /// FoundElse is false, then #else directives are ok, if not, then we have + /// already seen one so a #else directive is a duplicate. When this returns, + /// the caller can lex the first valid token. + void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc, + bool FoundNonSkipPortion, bool FoundElse); + + /// EvaluateDirectiveExpression - Evaluate an integer constant expression that + /// may occur after a #if or #elif directive and return it as a bool. If the + /// expression is equivalent to "!defined(X)" return X in IfNDefMacro. + bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro); + + /// RegisterBuiltinPragmas - Install the standard preprocessor pragmas: + /// #pragma GCC poison/system_header/dependency and #pragma once. + void RegisterBuiltinPragmas(); + + /// RegisterBuiltinMacros - Register builtin macros, such as __LINE__ with the + /// identifier table. + void RegisterBuiltinMacros(); + IdentifierInfo *RegisterBuiltinMacro(const char *Name); + + /// HandleMacroExpandedIdentifier - If an identifier token is read that is to + /// be expanded as a macro, handle it and return the next token as 'Tok'. If + /// the macro should not be expanded return true, otherwise return false. + bool HandleMacroExpandedIdentifier(LexerToken &Tok, MacroInfo *MI); + + /// isNextPPTokenLParen - Determine whether the next preprocessor token to be + /// lexed is a '('. If so, consume the token and return true, if not, this + /// method should have no observable side-effect on the lexed tokens. + bool isNextPPTokenLParen(); + + /// ReadFunctionLikeMacroArgs - After reading "MACRO(", this method is + /// invoked to read all of the formal arguments specified for the macro + /// invocation. This returns null on error. + MacroArgs *ReadFunctionLikeMacroArgs(LexerToken &MacroName, MacroInfo *MI); + + /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded + /// as a builtin macro, handle it and return the next token as 'Tok'. + void ExpandBuiltinMacro(LexerToken &Tok); + + /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then + /// return the first token after the directive. The _Pragma token has just + /// been read into 'Tok'. + void Handle_Pragma(LexerToken &Tok); + + + /// EnterSourceFileWithLexer - Add a lexer to the top of the include stack and + /// start lexing tokens from it instead of the current buffer. + void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir); + + /// GetIncludeFilenameSpelling - Turn the specified lexer token into a fully + /// checked and spelled filename, e.g. as an operand of #include. This returns + /// true if the input filename was in <>'s or false if it were in ""'s. The + /// caller is expected to provide a buffer that is large enough to hold the + /// spelling of the filename, but is also expected to handle the case when + /// this method decides to use a different buffer. + bool GetIncludeFilenameSpelling(const LexerToken &FNTok, + const char *&BufStart, const char *&BufEnd); + + /// LookupFile - Given a "foo" or <foo> reference, look up the indicated file, + /// return null on failure. isAngled indicates whether the file reference is + /// for system #include's or not (i.e. using <> instead of ""). + const FileEntry *LookupFile(const char *FilenameStart,const char *FilenameEnd, + bool isAngled, const DirectoryLookup *FromDir, + const DirectoryLookup *&CurDir); + + //===--------------------------------------------------------------------===// + /// Handle*Directive - implement the various preprocessor directives. These + /// should side-effect the current preprocessor object so that the next call + /// to Lex() will return the appropriate token next. + + void HandleUserDiagnosticDirective(LexerToken &Tok, bool isWarning); + void HandleIdentSCCSDirective(LexerToken &Tok); + + // File inclusion. + void HandleIncludeDirective(LexerToken &Tok, + const DirectoryLookup *LookupFrom = 0, + bool isImport = false); + void HandleIncludeNextDirective(LexerToken &Tok); + void HandleImportDirective(LexerToken &Tok); + + // Macro handling. + void HandleDefineDirective(LexerToken &Tok, bool isTargetSpecific); + void HandleUndefDirective(LexerToken &Tok); + void HandleDefineOtherTargetDirective(LexerToken &Tok); + // HandleAssertDirective(LexerToken &Tok); + // HandleUnassertDirective(LexerToken &Tok); + + // Conditional Inclusion. + void HandleIfdefDirective(LexerToken &Tok, bool isIfndef, + bool ReadAnyTokensBeforeDirective); + void HandleIfDirective(LexerToken &Tok, bool ReadAnyTokensBeforeDirective); + void HandleEndifDirective(LexerToken &Tok); + void HandleElseDirective(LexerToken &Tok); + void HandleElifDirective(LexerToken &Tok); + + // Pragmas. + void HandlePragmaDirective(); +public: + void HandlePragmaOnce(LexerToken &OnceTok); + void HandlePragmaPoison(LexerToken &PoisonTok); + void HandlePragmaSystemHeader(LexerToken &SysHeaderTok); + void HandlePragmaDependency(LexerToken &DependencyTok); +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Lex/ScratchBuffer.h b/include/clang/Lex/ScratchBuffer.h new file mode 100644 index 0000000000..e2d62d19c7 --- /dev/null +++ b/include/clang/Lex/ScratchBuffer.h @@ -0,0 +1,50 @@ +//===--- ScratchBuffer.h - Scratch space for forming tokens -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ScratchBuffer interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SCRATCHBUFFER_H +#define LLVM_CLANG_SCRATCHBUFFER_H + +namespace clang { + class SourceManager; + class SourceLocation; + +/// ScratchBuffer - This class exposes a simple interface for the dynamic +/// construction of tokens. This is used for builtin macros (e.g. __LINE__) as +/// well as token pasting, etc. +class ScratchBuffer { + SourceManager &SourceMgr; + char *CurBuffer; + unsigned FileID; + unsigned BytesUsed; +public: + ScratchBuffer(SourceManager &SM); + + /// getToken - Splat the specified text into a temporary MemoryBuffer and + /// return a SourceLocation that refers to the token. The SourceLoc value + /// gives a virtual location that the token will appear to be from. + SourceLocation getToken(const char *Buf, unsigned Len, + SourceLocation SourceLoc); + + /// getToken - Splat the specified text into a temporary MemoryBuffer and + /// return a SourceLocation that refers to the token. This is just like the + /// previous method, but returns a location that indicates the physloc of the + /// token. + SourceLocation getToken(const char *Buf, unsigned Len); + +private: + void AllocScratchBuffer(unsigned RequestLen); +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Parse/Action.h b/include/clang/Parse/Action.h new file mode 100644 index 0000000000..bd7fbe4e5e --- /dev/null +++ b/include/clang/Parse/Action.h @@ -0,0 +1,404 @@ +//===--- Action.h - Parser Action Interface ---------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Action and EmptyAction interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_PARSE_ACTION_H +#define LLVM_CLANG_PARSE_ACTION_H + +#include "clang/Basic/SourceLocation.h" +#include "clang/Basic/TokenKinds.h" + +namespace clang { + // Semantic. + class DeclSpec; + class Declarator; + class AttributeList; + // Parse. + class Scope; + class Action; + // Lex. + class IdentifierInfo; + class LexerToken; + +/// Action - As the parser reads the input file and recognizes the productions +/// of the grammar, it invokes methods on this class to turn the parsed input +/// into something useful: e.g. a parse tree. +/// +/// The callback methods that this class provides are phrased as actions that +/// the parser has just done or is about to do when the method is called. They +/// are not requests that the actions module do the specified action. +/// +/// All of the methods here are optional except isTypeName(), which must be +/// specified in order for the parse to complete accurately. The EmptyAction +/// class does this bare-minimum of tracking to implement this functionality. +class Action { +public: + /// Out-of-line virtual destructor to provide home for this class. + virtual ~Action(); + + // Types - Though these don't actually enforce strong typing, they document + // what types are required to be identical for the actions. + typedef void ExprTy; + typedef void StmtTy; + typedef void DeclTy; + typedef void TypeTy; + typedef void AttrTy; + + /// ActionResult - This structure is used while parsing/acting on expressions, + /// stmts, etc. It encapsulates both the object returned by the action, plus + /// a sense of whether or not it is valid. + template<unsigned UID> + struct ActionResult { + void *Val; + bool isInvalid; + + ActionResult(bool Invalid = false) : Val(0), isInvalid(Invalid) {} + template<typename ActualExprTy> + ActionResult(ActualExprTy *val) : Val(val), isInvalid(false) {} + + const ActionResult &operator=(void *RHS) { + Val = RHS; + isInvalid = false; + return *this; + } + }; + + /// Expr/Stmt/TypeResult - Provide a unique type to wrap ExprTy/StmtTy/TypeTy, + /// providing strong typing and allowing for failure. + typedef ActionResult<0> ExprResult; + typedef ActionResult<1> StmtResult; + typedef ActionResult<2> TypeResult; + + //===--------------------------------------------------------------------===// + // Declaration Tracking Callbacks. + //===--------------------------------------------------------------------===// + + /// isTypeName - Return non-null if the specified identifier is a typedef name + /// in the current scope. + virtual DeclTy *isTypeName(const IdentifierInfo &II, Scope *S) const = 0; + + /// ParseDeclarator - This callback is invoked when a declarator is parsed and + /// 'Init' specifies the initializer if any. This is for things like: + /// "int X = 4" or "typedef int foo". + /// + /// LastInGroup is non-null for cases where one declspec has multiple + /// declarators on it. For example in 'int A, B', ParseDeclarator will be + /// called with LastInGroup=A when invoked for B. + virtual DeclTy *ParseDeclarator(Scope *S, Declarator &D, + ExprTy *Init, DeclTy *LastInGroup) { + return 0; + } + + /// FinalizeDeclaratorGroup - After a sequence of declarators are parsed, this + /// gives the actions implementation a chance to process the group as a whole. + virtual DeclTy *FinalizeDeclaratorGroup(Scope *S, DeclTy *Group) { + return Group; + } + + /// ParseStartOfFunctionDef - This is called at the start of a function + /// definition, instead of calling ParseDeclarator. The Declarator includes + /// information about formal arguments that are part of this function. + virtual DeclTy *ParseStartOfFunctionDef(Scope *FnBodyScope, Declarator &D) { + // Default to ParseDeclarator. + return ParseDeclarator(FnBodyScope, D, 0, 0); + } + + /// ParseFunctionDefBody - This is called when a function body has completed + /// parsing. Decl is the DeclTy returned by ParseStartOfFunctionDef. + virtual DeclTy *ParseFunctionDefBody(DeclTy *Decl, StmtTy *Body) { + return Decl; + } + + + /// PopScope - This callback is called immediately before the specified scope + /// is popped and deleted. + virtual void PopScope(SourceLocation Loc, Scope *S) {} + + /// ParsedFreeStandingDeclSpec - This method is invoked when a declspec with + /// no declarator (e.g. "struct foo;") is parsed. + virtual DeclTy *ParsedFreeStandingDeclSpec(Scope *S, DeclSpec &DS) { + return 0; + } + + virtual DeclTy *ParsedObjcClassDeclaration(Scope *S, + IdentifierInfo **IdentList, + unsigned NumElts) { + return 0; + } + + //===--------------------------------------------------------------------===// + // Type Parsing Callbacks. + //===--------------------------------------------------------------------===// + + virtual TypeResult ParseTypeName(Scope *S, Declarator &D) { + return 0; + } + + virtual TypeResult ParseParamDeclaratorType(Scope *S, Declarator &D) { + return 0; + } + + enum TagKind { + TK_Reference, // Reference to a tag: 'struct foo *X;' + TK_Declaration, // Fwd decl of a tag: 'struct foo;' + TK_Definition // Definition of a tag: 'struct foo { int X; } Y;' + }; + virtual DeclTy *ParseTag(Scope *S, unsigned TagType, TagKind TK, + SourceLocation KWLoc, IdentifierInfo *Name, + SourceLocation NameLoc, AttributeList *Attr) { + // TagType is an instance of DeclSpec::TST, indicating what kind of tag this + // is (struct/union/enum/class). + return 0; + } + + virtual DeclTy *ParseField(Scope *S, DeclTy *TagDecl,SourceLocation DeclStart, + Declarator &D, ExprTy *BitfieldWidth) { + return 0; + } + virtual void ParseRecordBody(SourceLocation RecLoc, DeclTy *TagDecl, + DeclTy **Fields, unsigned NumFields) {} + + virtual DeclTy *ParseEnumConstant(Scope *S, DeclTy *EnumDecl, + DeclTy *LastEnumConstant, + SourceLocation IdLoc, IdentifierInfo *Id, + SourceLocation EqualLoc, ExprTy *Val) { + return 0; + } + virtual void ParseEnumBody(SourceLocation EnumLoc, DeclTy *EnumDecl, + DeclTy **Elements, unsigned NumElements) {} + + //===--------------------------------------------------------------------===// + // Statement Parsing Callbacks. + //===--------------------------------------------------------------------===// + + virtual StmtResult ParseNullStmt(SourceLocation SemiLoc) { + return 0; + } + + virtual StmtResult ParseCompoundStmt(SourceLocation L, SourceLocation R, + StmtTy **Elts, unsigned NumElts) { + return 0; + } + virtual StmtResult ParseDeclStmt(DeclTy *Decl) { + return 0; + } + + virtual StmtResult ParseExprStmt(ExprTy *Expr) { + return StmtResult(Expr); + } + + /// ParseCaseStmt - Note that this handles the GNU 'case 1 ... 4' extension, + /// which can specify an RHS value. + virtual StmtResult ParseCaseStmt(SourceLocation CaseLoc, ExprTy *LHSVal, + SourceLocation DotDotDotLoc, ExprTy *RHSVal, + SourceLocation ColonLoc, StmtTy *SubStmt) { + return 0; + } + virtual StmtResult ParseDefaultStmt(SourceLocation DefaultLoc, + SourceLocation ColonLoc, StmtTy *SubStmt){ + return 0; + } + + virtual StmtResult ParseLabelStmt(SourceLocation IdentLoc, IdentifierInfo *II, + SourceLocation ColonLoc, StmtTy *SubStmt) { + return 0; + } + + virtual StmtResult ParseIfStmt(SourceLocation IfLoc, ExprTy *CondVal, + StmtTy *ThenVal, SourceLocation ElseLoc, + StmtTy *ElseVal) { + return 0; + } + + virtual StmtResult ParseSwitchStmt(SourceLocation SwitchLoc, ExprTy *Cond, + StmtTy *Body) { + return 0; + } + virtual StmtResult ParseWhileStmt(SourceLocation WhileLoc, ExprTy *Cond, + StmtTy *Body) { + return 0; + } + virtual StmtResult ParseDoStmt(SourceLocation DoLoc, StmtTy *Body, + SourceLocation WhileLoc, ExprTy *Cond) { + return 0; + } + virtual StmtResult ParseForStmt(SourceLocation ForLoc, + SourceLocation LParenLoc, + StmtTy *First, ExprTy *Second, ExprTy *Third, + SourceLocation RParenLoc, StmtTy *Body) { + return 0; + } + virtual StmtResult ParseGotoStmt(SourceLocation GotoLoc, + SourceLocation LabelLoc, + IdentifierInfo *LabelII) { + return 0; + } + virtual StmtResult ParseIndirectGotoStmt(SourceLocation GotoLoc, + SourceLocation StarLoc, + ExprTy *DestExp) { + return 0; + } + virtual StmtResult ParseContinueStmt(SourceLocation ContinueLoc, + Scope *CurScope) { + return 0; + } + virtual StmtResult ParseBreakStmt(SourceLocation GotoLoc, Scope *CurScope) { + return 0; + } + virtual StmtResult ParseReturnStmt(SourceLocation ReturnLoc, + ExprTy *RetValExp) { + return 0; + } + + //===--------------------------------------------------------------------===// + // Expression Parsing Callbacks. + //===--------------------------------------------------------------------===// + + // Primary Expressions. + + /// ParseIdentifierExpr - Parse an identifier in expression context. + /// 'HasTrailingLParen' indicates whether or not the identifier has a '(' + /// token immediately after it. + virtual ExprResult ParseIdentifierExpr(Scope *S, SourceLocation Loc, + IdentifierInfo &II, + bool HasTrailingLParen) { + return 0; + } + + virtual ExprResult ParseSimplePrimaryExpr(SourceLocation Loc, + tok::TokenKind Kind) { + return 0; + } + virtual ExprResult ParseCharacterConstant(const LexerToken &) { return 0; } + virtual ExprResult ParseNumericConstant(const LexerToken &) { return 0; } + + /// ParseStringLiteral - The specified tokens were lexed as pasted string + /// fragments (e.g. "foo" "bar" L"baz"). + virtual ExprResult ParseStringLiteral(const LexerToken *Toks, unsigned NumToks) { + return 0; + } + + virtual ExprResult ParseParenExpr(SourceLocation L, SourceLocation R, + ExprTy *Val) { + return Val; // Default impl returns operand. + } + + // Postfix Expressions. + virtual ExprResult ParsePostfixUnaryOp(SourceLocation OpLoc, + tok::TokenKind Kind, ExprTy *Input) { + return 0; + } + virtual ExprResult ParseArraySubscriptExpr(ExprTy *Base, SourceLocation LLoc, + ExprTy *Idx, SourceLocation RLoc) { + return 0; + } + virtual ExprResult ParseMemberReferenceExpr(ExprTy *Base,SourceLocation OpLoc, + tok::TokenKind OpKind, + SourceLocation MemberLoc, + IdentifierInfo &Member) { + return 0; + } + + /// ParseCallExpr - Handle a call to Fn with the specified array of arguments. + /// This provides the location of the left/right parens and a list of comma + /// locations. There are guaranteed to be one fewer commas than arguments, + /// unless there are zero arguments. + virtual ExprResult ParseCallExpr(ExprTy *Fn, SourceLocation LParenLoc, + ExprTy **Args, unsigned NumArgs, + SourceLocation *CommaLocs, + SourceLocation RParenLoc) { + return 0; + } + + // Unary Operators. 'Tok' is the token for the operator. + virtual ExprResult ParseUnaryOp(SourceLocation OpLoc, tok::TokenKind Op, + ExprTy *Input) { + return 0; + } + virtual ExprResult + ParseSizeOfAlignOfTypeExpr(SourceLocation OpLoc, bool isSizeof, + SourceLocation LParenLoc, TypeTy *Ty, + SourceLocation RParenLoc) { + return 0; + } + + virtual ExprResult ParseCastExpr(SourceLocation LParenLoc, TypeTy *Ty, + SourceLocation RParenLoc, ExprTy *Op) { + return 0; + } + + virtual ExprResult ParseBinOp(SourceLocation TokLoc, tok::TokenKind Kind, + ExprTy *LHS, ExprTy *RHS) { + return 0; + } + + /// ParseConditionalOp - Parse a ?: operation. Note that 'LHS' may be null + /// in the case of a the GNU conditional expr extension. + virtual ExprResult ParseConditionalOp(SourceLocation QuestionLoc, + SourceLocation ColonLoc, + ExprTy *Cond, ExprTy *LHS, ExprTy *RHS){ + return 0; + } + + virtual ExprResult ParseAddrLabel(SourceLocation OpLoc, SourceLocation LabLoc, + IdentifierInfo *LabelII) { // "&&foo" + return 0; + } + + + /// ParseCXXCasts - Parse {dynamic,static,reinterpret,const}_cast's. + virtual ExprResult ParseCXXCasts(SourceLocation OpLoc, tok::TokenKind Kind, + SourceLocation LAngleBracketLoc, TypeTy *Ty, + SourceLocation RAngleBracketLoc, + SourceLocation LParenLoc, ExprTy *Op, + SourceLocation RParenLoc) { + return 0; + } + + /// ParseCXXBoolLiteral - Parse {true,false} literals. + virtual ExprResult ParseCXXBoolLiteral(SourceLocation OpLoc, + tok::TokenKind Kind) { + return 0; + } +}; + +/// MinimalAction - Minimal actions are used by light-weight clients of the +/// parser that do not need name resolution or significant semantic analysis to +/// be performed. The actions implemented here are in the form of unresolved +/// identifiers. By using a simpler interface than the SemanticAction class, +/// the parser doesn't have to build complex data structures and thus runs more +/// quickly. +class MinimalAction : public Action { +public: + /// isTypeName - This looks at the IdentifierInfo::FETokenInfo field to + /// determine whether the name is a typedef or not in this scope. + virtual DeclTy *isTypeName(const IdentifierInfo &II, Scope *S) const; + + /// ParseDeclarator - If this is a typedef declarator, we modify the + /// IdentifierInfo::FETokenInfo field to keep track of this fact, until S is + /// popped. + virtual DeclTy *ParseDeclarator(Scope *S, Declarator &D, ExprTy *Init, + DeclTy *LastInGroup); + + /// PopScope - When a scope is popped, if any typedefs are now out-of-scope, + /// they are removed from the IdentifierInfo::FETokenInfo field. + virtual void PopScope(SourceLocation Loc, Scope *S); + + virtual DeclTy *ParsedObjcClassDeclaration(Scope *S, + IdentifierInfo **IdentList, + unsigned NumElts); + +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Parse/AttributeList.h b/include/clang/Parse/AttributeList.h new file mode 100644 index 0000000000..8bde7d08fd --- /dev/null +++ b/include/clang/Parse/AttributeList.h @@ -0,0 +1,86 @@ +//===--- AttributeList.h ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Steve Naroff and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the AttributeList class interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ATTRLIST_H +#define LLVM_CLANG_ATTRLIST_H + +#include "clang/Parse/Action.h" +#include <cassert> + +namespace clang { + +/// AttributeList - Represents GCC's __attribute__ declaration. There are +/// 4 forms of this construct...they are: +/// +/// 1: __attribute__(( const )). ParmName/Args/NumArgs will all be unused. +/// 2: __attribute__(( mode(byte) )). ParmName used, Args/NumArgs unused. +/// 3: __attribute__(( format(printf, 1, 2) )). ParmName/Args/NumArgs all used. +/// 4: __attribute__(( aligned(16) )). ParmName is unused, Args/Num used. +/// +class AttributeList { + IdentifierInfo *AttrName; + SourceLocation AttrLoc; + IdentifierInfo *ParmName; + SourceLocation ParmLoc; + Action::ExprTy **Args; + unsigned NumArgs; + AttributeList *Next; +public: + AttributeList(IdentifierInfo *AttrName, SourceLocation AttrLoc, + IdentifierInfo *ParmName, SourceLocation ParmLoc, + Action::ExprTy **args, unsigned numargs, AttributeList *Next); + ~AttributeList() { + if (Args) { + // FIXME: before we delete the vector, we need to make sure the Expr's + // have been deleted. Since Action::ExprTy is "void", we are dependent + // on the actions module for actually freeing the memory. The specific + // hooks are ParseDeclarator, ParseTypeName, ParseParamDeclaratorType, + // ParseField, ParseTag. Once these routines have freed the expression, + // they should zero out the Args slot (to indicate the memory has been + // freed). If any element of the vector is non-null, we should assert. + delete [] Args; + } + if (Next) + delete Next; + } + + IdentifierInfo *getAttributeName() const { return AttrName; } + SourceLocation getAttributeLoc() const { return AttrLoc; } + IdentifierInfo *getParameterName() const { return ParmName; } + + AttributeList *getNext() const { return Next; } + void setNext(AttributeList *N) { Next = N; } + + void addAttributeList(AttributeList *alist) { + assert((alist != 0) && "addAttributeList(): alist is null"); + AttributeList *next = this, *prev; + do { + prev = next; + next = next->getNext(); + } while (next); + prev->setNext(alist); + } + + /// getNumArgs - Return the number of actual arguments to this attribute. + unsigned getNumArgs() const { return NumArgs; } + + /// getArg - Return the specified argument. + Action::ExprTy *getArg(unsigned Arg) const { + assert(Arg < NumArgs && "Arg access out of range!"); + return Args[Arg]; + } +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Parse/DeclSpec.h b/include/clang/Parse/DeclSpec.h new file mode 100644 index 0000000000..95be993831 --- /dev/null +++ b/include/clang/Parse/DeclSpec.h @@ -0,0 +1,552 @@ +//===--- SemaDeclSpec.h - Declaration Specifier Semantic Analys -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines interfaces used for Declaration Specifiers and Declarators. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_PARSE_SEMADECLSPEC_H +#define LLVM_CLANG_PARSE_SEMADECLSPEC_H + +#include "clang/Basic/Diagnostic.h" +#include "clang/Parse/Action.h" +#include "clang/Parse/AttributeList.h" +#include "llvm/ADT/SmallVector.h" + +namespace clang { + class LangOptions; + class IdentifierInfo; + +/// DeclSpec - This class captures information about "declaration specifiers", +/// which encompases storage-class-specifiers, type-specifiers, type-qualifiers, +/// and function-specifiers. +class DeclSpec { +public: + // storage-class-specifier + enum SCS { + SCS_unspecified, + SCS_typedef, + SCS_extern, + SCS_static, + SCS_auto, + SCS_register + }; + + // type-specifier + enum TSW { + TSW_unspecified, + TSW_short, + TSW_long, + TSW_longlong + }; + + enum TSC { + TSC_unspecified, + TSC_imaginary, + TSC_complex + }; + + enum TSS { + TSS_unspecified, + TSS_signed, + TSS_unsigned + }; + + enum TST { + TST_unspecified, + TST_void, + TST_char, + TST_int, + TST_float, + TST_double, + TST_bool, // _Bool + TST_decimal32, // _Decimal32 + TST_decimal64, // _Decimal64 + TST_decimal128, // _Decimal128 + TST_enum, + TST_union, + TST_struct, + TST_typedef + }; + + // type-qualifiers + enum TQ { // NOTE: These flags must be kept in sync with QualType::TQ. + TQ_unspecified = 0, + TQ_const = 1, + TQ_restrict = 2, + TQ_volatile = 4 + }; + + /// ParsedSpecifiers - Flags to query which specifiers were applied. This is + /// returned by getParsedSpecifiers. + enum ParsedSpecifiers { + PQ_None = 0, + PQ_StorageClassSpecifier = 1, + PQ_TypeSpecifier = 2, + PQ_TypeQualifier = 4, + PQ_FunctionSpecifier = 8 + }; + +private: + + // storage-class-specifier + SCS StorageClassSpec : 3; + bool SCS_thread_specified : 1; + + // type-specifier + TSW TypeSpecWidth : 2; + TSC TypeSpecComplex : 2; + TSS TypeSpecSign : 2; + TST TypeSpecType : 4; + + // type-qualifiers + unsigned TypeQualifiers : 3; // Bitwise OR of TQ. + + // function-specifier + bool FS_inline_specified : 1; + + /// TypeRep - This contains action-specific information about a specific TST. + /// For example, for a typedef or struct, it might contain the declaration for + /// these. + void *TypeRep; + + // attributes. + AttributeList *AttrList; + + // SourceLocation info. These are null if the item wasn't specified or if + // the setting was synthesized. + SourceLocation StorageClassSpecLoc, SCS_threadLoc; + SourceLocation TSWLoc, TSCLoc, TSSLoc, TSTLoc; + SourceLocation TQ_constLoc, TQ_restrictLoc, TQ_volatileLoc; + SourceLocation FS_inlineLoc; +public: + + DeclSpec() + : StorageClassSpec(SCS_unspecified), + SCS_thread_specified(false), + TypeSpecWidth(TSW_unspecified), + TypeSpecComplex(TSC_unspecified), + TypeSpecSign(TSS_unspecified), + TypeSpecType(TST_unspecified), + TypeQualifiers(TSS_unspecified), + FS_inline_specified(false), + TypeRep(0), + AttrList(0) { + } + ~DeclSpec() { + delete AttrList; + } + // storage-class-specifier + SCS getStorageClassSpec() const { return StorageClassSpec; } + bool isThreadSpecified() const { return SCS_thread_specified; } + + SourceLocation getStorageClassSpecLoc() const { return StorageClassSpecLoc; } + SourceLocation getThreadSpecLoc() const { return SCS_threadLoc; } + + + void ClearStorageClassSpecs() { + StorageClassSpec = DeclSpec::SCS_unspecified; + SCS_thread_specified = false; + StorageClassSpecLoc = SourceLocation(); + SCS_threadLoc = SourceLocation(); + } + + // type-specifier + TSW getTypeSpecWidth() const { return TypeSpecWidth; } + TSC getTypeSpecComplex() const { return TypeSpecComplex; } + TSS getTypeSpecSign() const { return TypeSpecSign; } + TST getTypeSpecType() const { return TypeSpecType; } + void *getTypeRep() const { return TypeRep; } + + SourceLocation getTypeSpecWidthLoc() const { return TSWLoc; } + SourceLocation getTypeSpecComplexLoc() const { return TSCLoc; } + SourceLocation getTypeSpecSignLoc() const { return TSSLoc; } + SourceLocation getTypeSpecTypeLoc() const { return TSTLoc; } + + /// getSpecifierName - Turn a type-specifier-type into a string like "_Bool" + /// or "union". + static const char *getSpecifierName(DeclSpec::TST T); + static const char *getSpecifierName(DeclSpec::SCS S); + + // type-qualifiers + + /// getTypeQualifiers - Return a set of TQs. + unsigned getTypeQualifiers() const { return TypeQualifiers; } + SourceLocation getConstSpecLoc() const { return TQ_constLoc; } + SourceLocation getRestrictSpecLoc() const { return TQ_restrictLoc; } + SourceLocation getVolatileSpecLoc() const { return TQ_volatileLoc; } + + + // function-specifier + bool isInlineSpecified() const { return FS_inline_specified; } + SourceLocation getInlineSpecLoc() const { return FS_inlineLoc; } + void ClearFunctionSpecs() { + FS_inline_specified = false; + FS_inlineLoc = SourceLocation(); + } + + /// hasTypeSpecifier - Return true if any type-specifier has been found. + bool hasTypeSpecifier() const { + return getTypeSpecType() != DeclSpec::TST_unspecified || + getTypeSpecWidth() != DeclSpec::TSW_unspecified || + getTypeSpecComplex() != DeclSpec::TSC_unspecified || + getTypeSpecSign() != DeclSpec::TSS_unspecified; + } + + + /// getParsedSpecifiers - Return a bitmask of which flavors of specifiers this + /// DeclSpec includes. + /// + unsigned getParsedSpecifiers() const; + + /// These methods set the specified attribute of the DeclSpec, but return true + /// and ignore the request if invalid (e.g. "extern" then "auto" is + /// specified). The name of the previous specifier is returned in prevspec. + bool SetStorageClassSpec(SCS S, SourceLocation Loc, const char *&PrevSpec); + bool SetStorageClassSpecThread(SourceLocation Loc, const char *&PrevSpec); + bool SetTypeSpecWidth(TSW W, SourceLocation Loc, const char *&PrevSpec); + bool SetTypeSpecComplex(TSC C, SourceLocation Loc, const char *&PrevSpec); + bool SetTypeSpecSign(TSS S, SourceLocation Loc, const char *&PrevSpec); + bool SetTypeSpecType(TST T, SourceLocation Loc, const char *&PrevSpec, + void *TypeRep = 0); + + bool SetTypeQual(TQ T, SourceLocation Loc, const char *&PrevSpec, + const LangOptions &Lang); + + bool SetFunctionSpecInline(SourceLocation Loc, const char *&PrevSpec); + + /// AddAttributes - contatenates two attribute lists. + /// The GCC attribute syntax allows for the following: + /// + /// short __attribute__(( unused, deprecated )) + /// int __attribute__(( may_alias, aligned(16) )) var; + /// + /// This declares 4 attributes using 2 lists. The following syntax is + /// also allowed and equivalent to the previous declaration. + /// + /// short __attribute__((unused)) __attribute__((deprecated)) + /// int __attribute__((may_alias)) __attribute__((aligned(16))) var; + /// + void AddAttributes(AttributeList *alist) { + if (!alist) + return; // we parsed __attribute__(()) or had a syntax error + + if (AttrList) + alist->addAttributeList(AttrList); + AttrList = alist; + } + AttributeList *getAttributes() const { return AttrList; } + + /// Finish - This does final analysis of the declspec, issuing diagnostics for + /// things like "_Imaginary" (lacking an FP type). After calling this method, + /// DeclSpec is guaranteed self-consistent, even if an error occurred. + void Finish(Diagnostic &D, const LangOptions &Lang); + +private: + void Diag(Diagnostic &D, SourceLocation Loc, unsigned DiagID) { + D.Report(Loc, DiagID); + } + void Diag(Diagnostic &D, SourceLocation Loc, unsigned DiagID, + const std::string &info) { + D.Report(Loc, DiagID, &info, 1); + } +}; + + +/// DeclaratorChunk - One instance of this struct is used for each type in a +/// declarator that is parsed. +/// +/// This is intended to be a small value object. +struct DeclaratorChunk { + enum { + Pointer, Reference, Array, Function + } Kind; + + /// Loc - The place where this type was defined. + SourceLocation Loc; + + struct PointerTypeInfo { + /// The type qualifiers: const/volatile/restrict. + unsigned TypeQuals : 3; + void destroy() {} + }; + + struct ReferenceTypeInfo { + /// The type qualifier: restrict. [GNU] C++ extension + bool HasRestrict; + void destroy() {} + }; + + struct ArrayTypeInfo { + /// The type qualifiers for the array: const/volatile/restrict. + unsigned TypeQuals : 3; + + /// True if this dimension included the 'static' keyword. + bool hasStatic : 1; + + /// True if this dimension was [*]. In this case, NumElts is null. + bool isStar : 1; + + /// This is the size of the array, or null if [] or [*] was specified. + /// Since the parser is multi-purpose, and we don't want to impose a root + /// expression class on all clients, NumElts is untyped. + Action::ExprTy *NumElts; + void destroy() {} + }; + + /// ParamInfo - An array of paraminfo objects is allocated whenever a function + /// declarator is parsed. There are two interesting styles of arguments here: + /// K&R-style identifier lists and parameter type lists. K&R-style identifier + /// lists will have information about the identifier, but no type information. + /// Parameter type lists will have type info (if the actions module provides + /// it), but may have null identifier info: e.g. for 'void foo(int X, int)'. + struct ParamInfo { + IdentifierInfo *Ident; + SourceLocation IdentLoc; + Action::TypeTy *TypeInfo; + // FIXME: this also needs an attribute list. + ParamInfo() {} + ParamInfo(IdentifierInfo *ident, SourceLocation iloc, Action::TypeTy *typ) + : Ident(ident), IdentLoc(iloc), TypeInfo(typ) { + } + }; + + struct FunctionTypeInfo { + /// hasPrototype - This is true if the function had at least one typed + /// argument. If the function is () or (a,b,c), then it has no prototype, + /// and is treated as a K&R-style function. + bool hasPrototype : 1; + + /// isVariadic - If this function has a prototype, and if that proto ends + /// with ',...)', this is true. + bool isVariadic : 1; + + /// NumArgs - This is the number of formal arguments provided for the + /// declarator. + unsigned NumArgs; + + /// ArgInfo - This is a pointer to a new[]'d array of ParamInfo objects that + /// describe the arguments for this function declarator. This is null if + /// there are no arguments specified. + ParamInfo *ArgInfo; + + void destroy() { + delete[] ArgInfo; + } + }; + + union { + PointerTypeInfo Ptr; + ReferenceTypeInfo Ref; + ArrayTypeInfo Arr; + FunctionTypeInfo Fun; + }; + + + /// getPointer - Return a DeclaratorChunk for a pointer. + /// + static DeclaratorChunk getPointer(unsigned TypeQuals, SourceLocation Loc) { + DeclaratorChunk I; + I.Kind = Pointer; + I.Loc = Loc; + I.Ptr.TypeQuals = TypeQuals; + return I; + } + + /// getReference - Return a DeclaratorChunk for a reference. + /// + static DeclaratorChunk getReference(unsigned TypeQuals, SourceLocation Loc) { + DeclaratorChunk I; + I.Kind = Reference; + I.Loc = Loc; + I.Ref.HasRestrict = (TypeQuals & DeclSpec::TQ_restrict) != 0; + return I; + } + + /// getArray - Return a DeclaratorChunk for an array. + /// + static DeclaratorChunk getArray(unsigned TypeQuals, bool isStatic, + bool isStar, void *NumElts, + SourceLocation Loc) { + DeclaratorChunk I; + I.Kind = Array; + I.Loc = Loc; + I.Arr.TypeQuals = TypeQuals; + I.Arr.hasStatic = isStatic; + I.Arr.isStar = isStar; + I.Arr.NumElts = NumElts; + return I; + } + + /// getFunction - Return a DeclaratorChunk for a function. + static DeclaratorChunk getFunction(bool hasProto, bool isVariadic, + ParamInfo *ArgInfo, unsigned NumArgs, + SourceLocation Loc) { + DeclaratorChunk I; + I.Kind = Function; + I.Loc = Loc; + I.Fun.hasPrototype = hasProto; + I.Fun.isVariadic = isVariadic; + I.Fun.NumArgs = NumArgs; + I.Fun.ArgInfo = 0; + + // new[] an argument array if needed. + if (NumArgs) { + I.Fun.ArgInfo = new DeclaratorChunk::ParamInfo[NumArgs]; + memcpy(I.Fun.ArgInfo, ArgInfo, sizeof(ArgInfo[0])*NumArgs); + } + return I; + } +}; + + +/// Declarator - Information about one declarator, including the parsed type +/// information and the identifier. When the declarator is fully formed, this +/// is turned into the appropriate Decl object. +/// +/// Declarators come in two types: normal declarators and abstract declarators. +/// Abstract declarators are used when parsing types, and don't have an +/// identifier. Normal declarators do have ID's. +/// +/// This is NOT intended to be a small value object: this should be a transient +/// object that lives on the stack. +class Declarator { + const DeclSpec &DS; + IdentifierInfo *Identifier; + SourceLocation IdentifierLoc; + +public: + enum TheContext { + FileContext, // File scope declaration. + PrototypeContext, // Within a function prototype. + KNRTypeListContext, // K&R type definition list for formals. + TypeNameContext, // Abstract declarator for types. + MemberContext, // Struct/Union field. + BlockContext, // Declaration within a block in a function. + ForContext // Declaration within first part of a for loop. + }; +private: + /// Context - Where we are parsing this declarator. + /// + TheContext Context; + + /// DeclTypeInfo - This holds each type that the declarator includes as it is + /// parsed. This is pushed from the identifier out, which means that element + /// #0 will be the most closely bound to the identifier, and + /// DeclTypeInfo.back() will be the least closely bound. + llvm::SmallVector<DeclaratorChunk, 8> DeclTypeInfo; + + // attributes. + AttributeList *AttrList; +public: + Declarator(const DeclSpec &ds, TheContext C) + : DS(ds), Identifier(0), Context(C), AttrList(0) { + } + + ~Declarator() { + clear(); + } + + /// getDeclSpec - Return the declaration-specifier that this declarator was + /// declared with. + const DeclSpec &getDeclSpec() const { return DS; } + + TheContext getContext() const { return Context; } + + /// clear - Reset the contents of this Declarator. + void clear() { + Identifier = 0; + IdentifierLoc = SourceLocation(); + + for (unsigned i = 0, e = DeclTypeInfo.size(); i != e; ++i) { + if (DeclTypeInfo[i].Kind == DeclaratorChunk::Function) + DeclTypeInfo[i].Fun.destroy(); + else if (DeclTypeInfo[i].Kind == DeclaratorChunk::Pointer) + DeclTypeInfo[i].Ptr.destroy(); + else if (DeclTypeInfo[i].Kind == DeclaratorChunk::Reference) + DeclTypeInfo[i].Ref.destroy(); + else if (DeclTypeInfo[i].Kind == DeclaratorChunk::Array) + DeclTypeInfo[i].Arr.destroy(); + else + assert(0 && "Unknown decl type!"); + } + DeclTypeInfo.clear(); + delete AttrList; + } + + /// mayOmitIdentifier - Return true if the identifier is either optional or + /// not allowed. This is true for typenames and prototypes. + bool mayOmitIdentifier() const { + return Context == TypeNameContext || Context == PrototypeContext; + } + + /// mayHaveIdentifier - Return true if the identifier is either optional or + /// required. This is true for normal declarators and prototypes, but not + /// typenames. + bool mayHaveIdentifier() const { + return Context != TypeNameContext; + } + + /// isPastIdentifier - Return true if we have parsed beyond the point where + /// the + bool isPastIdentifier() const { return IdentifierLoc.isValid(); } + + IdentifierInfo *getIdentifier() const { return Identifier; } + SourceLocation getIdentifierLoc() const { return IdentifierLoc; } + + void SetIdentifier(IdentifierInfo *ID, SourceLocation Loc) { + Identifier = ID; + IdentifierLoc = Loc; + } + + void AddTypeInfo(const DeclaratorChunk &TI) { + DeclTypeInfo.push_back(TI); + } + + /// getNumTypeObjects() - Return the number of types applied to this + /// declarator. + unsigned getNumTypeObjects() const { return DeclTypeInfo.size(); } + + /// Return the specified TypeInfo from this declarator. TypeInfo #0 is + /// closest to the identifier. + const DeclaratorChunk &getTypeObject(unsigned i) const { + assert(i < DeclTypeInfo.size() && "Invalid type chunk"); + return DeclTypeInfo[i]; + } + DeclaratorChunk &getTypeObject(unsigned i) { + assert(i < DeclTypeInfo.size() && "Invalid type chunk"); + return DeclTypeInfo[i]; + } + + /// isFunctionDeclarator - Once this declarator is fully parsed and formed, + /// this method returns true if the identifier is a function declarator. + bool isFunctionDeclarator() const { + return !DeclTypeInfo.empty() && + DeclTypeInfo[0].Kind == DeclaratorChunk::Function; + } + + /// AddAttributes - simply adds the attribute list to the Declarator. + /// Unlike AddAttributes on DeclSpec, this routine should never have to + /// concatenate two lists. The following syntax adds 3 attributes to "var": + /// + /// short int var __attribute__((aligned(16),common,deprecated)); + /// + void AddAttributes(AttributeList *alist) { + if (!alist) + return; // we parsed __attribute__(()) or had a syntax error + assert((AttrList == 0) && "Declarator already has an attribute list"); + AttrList = alist; + } + AttributeList *getAttributes() const { return AttrList; } +}; + + +} // end namespace clang + +#endif diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h new file mode 100644 index 0000000000..19bee276a4 --- /dev/null +++ b/include/clang/Parse/Parser.h @@ -0,0 +1,371 @@ +//===--- Parser.h - C Language Parser ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Parser interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_PARSE_PARSER_H +#define LLVM_CLANG_PARSE_PARSER_H + +#include "clang/Lex/Preprocessor.h" +#include "clang/Parse/Action.h" + +namespace clang { + class DeclSpec; + class Declarator; + class AttributeList; + class Scope; + +/// Parser - This implements a parser for the C family of languages. After +/// parsing units of the grammar, productions are invoked to handle whatever has +/// been read. +/// +class Parser { + Preprocessor &PP; + + /// Tok - The current token we are peeking head. All parsing methods assume + /// that this is valid. + LexerToken Tok; + + unsigned short ParenCount, BracketCount, BraceCount; + + /// Actions - These are the callbacks we invoke as we parse various constructs + /// in the file. This refers to the common base class between MinimalActions + /// and SemaActions for those uses that don't matter. + Action &Actions; + + Scope *CurScope; + Diagnostic &Diags; +public: + Parser(Preprocessor &PP, Action &Actions); + ~Parser(); + + const LangOptions &getLang() const { return PP.getLangOptions(); } + TargetInfo &getTargetInfo() const { return PP.getTargetInfo(); } + Action &getActions() const { return Actions; } + + // Type forwarding. All of these are statically 'void*', but they may all be + // different actual classes based on the actions in place. + typedef Action::ExprTy ExprTy; + typedef Action::StmtTy StmtTy; + typedef Action::DeclTy DeclTy; + typedef Action::TypeTy TypeTy; + + // Parsing methods. + + /// ParseTranslationUnit - All in one method that initializes parses, and + /// shuts down the parser. + void ParseTranslationUnit(); + + /// Initialize - Warm up the parser. + /// + void Initialize(); + + /// ParseTopLevelDecl - Parse one top-level declaration, return whatever the + /// action tells us to. This returns true if the EOF was encountered. + bool ParseTopLevelDecl(DeclTy*& Result); + + /// Finalize - Shut down the parser. + /// + void Finalize(); + +private: + //===--------------------------------------------------------------------===// + // Low-Level token peeking and consumption methods. + // + + /// isTokenParen - Return true if the cur token is '(' or ')'. + bool isTokenParen() const { + return Tok.getKind() == tok::l_paren || Tok.getKind() == tok::r_paren; + } + /// isTokenBracket - Return true if the cur token is '[' or ']'. + bool isTokenBracket() const { + return Tok.getKind() == tok::l_square || Tok.getKind() == tok::r_square; + } + /// isTokenBrace - Return true if the cur token is '{' or '}'. + bool isTokenBrace() const { + return Tok.getKind() == tok::l_brace || Tok.getKind() == tok::r_brace; + } + + /// isTokenStringLiteral - True if this token is a string-literal. + /// + bool isTokenStringLiteral() const { + return Tok.getKind() == tok::string_literal || + Tok.getKind() == tok::wide_string_literal; + } + + /// ConsumeToken - Consume the current 'peek token' and lex the next one. + /// This does not work will all kinds of tokens: strings and specific other + /// tokens must be consumed with custom methods below. This returns the + /// location of the consumed token. + SourceLocation ConsumeToken() { + assert(!isTokenStringLiteral() && !isTokenParen() && !isTokenBracket() && + !isTokenBrace() && + "Should consume special tokens with Consume*Token"); + SourceLocation L = Tok.getLocation(); + PP.Lex(Tok); + return L; + } + + /// ConsumeAnyToken - Dispatch to the right Consume* method based on the + /// current token type. This should only be used in cases where the type of + /// the token really isn't known, e.g. in error recovery. + SourceLocation ConsumeAnyToken() { + if (isTokenParen()) + return ConsumeParen(); + else if (isTokenBracket()) + return ConsumeBracket(); + else if (isTokenBrace()) + return ConsumeBrace(); + else + return ConsumeToken(); + } + + /// ConsumeParen - This consume method keeps the paren count up-to-date. + /// + SourceLocation ConsumeParen() { + assert(isTokenParen() && "wrong consume method"); + if (Tok.getKind() == tok::l_paren) + ++ParenCount; + else if (ParenCount) + --ParenCount; // Don't let unbalanced )'s drive the count negative. + SourceLocation L = Tok.getLocation(); + PP.Lex(Tok); + return L; + } + + /// ConsumeBracket - This consume method keeps the bracket count up-to-date. + /// + SourceLocation ConsumeBracket() { + assert(isTokenBracket() && "wrong consume method"); + if (Tok.getKind() == tok::l_square) + ++BracketCount; + else if (BracketCount) + --BracketCount; // Don't let unbalanced ]'s drive the count negative. + + SourceLocation L = Tok.getLocation(); + PP.Lex(Tok); + return L; + } + + /// ConsumeBrace - This consume method keeps the brace count up-to-date. + /// + SourceLocation ConsumeBrace() { + assert(isTokenBrace() && "wrong consume method"); + if (Tok.getKind() == tok::l_brace) + ++BraceCount; + else if (BraceCount) + --BraceCount; // Don't let unbalanced }'s drive the count negative. + + SourceLocation L = Tok.getLocation(); + PP.Lex(Tok); + return L; + } + + /// ConsumeStringToken - Consume the current 'peek token', lexing a new one + /// and returning the token kind. This method is specific to strings, as it + /// handles string literal concatenation, as per C99 5.1.1.2, translation + /// phase #6. + SourceLocation ConsumeStringToken() { + assert(isTokenStringLiteral() && + "Should only consume string literals with this method"); + SourceLocation L = Tok.getLocation(); + PP.Lex(Tok); + return L; + } + + /// MatchRHSPunctuation - For punctuation with a LHS and RHS (e.g. '['/']'), + /// this helper function matches and consumes the specified RHS token if + /// present. If not present, it emits the specified diagnostic indicating + /// that the parser failed to match the RHS of the token at LHSLoc. LHSName + /// should be the name of the unmatched LHS token. This returns the location + /// of the consumed token. + SourceLocation MatchRHSPunctuation(tok::TokenKind RHSTok, + SourceLocation LHSLoc); + + /// ExpectAndConsume - The parser expects that 'ExpectedTok' is next in the + /// input. If so, it is consumed and false is returned. + /// + /// If the input is malformed, this emits the specified diagnostic. Next, if + /// SkipToTok is specified, it calls SkipUntil(SkipToTok). Finally, true is + /// returned. + bool ExpectAndConsume(tok::TokenKind ExpectedTok, unsigned Diag, + const char *DiagMsg = "", + tok::TokenKind SkipToTok = tok::unknown); + + //===--------------------------------------------------------------------===// + // Scope manipulation + + /// EnterScope - Start a new scope. + void EnterScope(unsigned ScopeFlags); + + /// ExitScope - Pop a scope off the scope stack. + void ExitScope(); + + //===--------------------------------------------------------------------===// + // Diagnostic Emission and Error recovery. + + void Diag(SourceLocation Loc, unsigned DiagID, + const std::string &Msg = std::string()); + void Diag(const LexerToken &Tok, unsigned DiagID, + const std::string &M = std::string()) { + Diag(Tok.getLocation(), DiagID, M); + } + + /// SkipUntil - Read tokens until we get to the specified token, then consume + /// it (unless DontConsume is false). Because we cannot guarantee that the + /// token will ever occur, this skips to the next token, or to some likely + /// good stopping point. If StopAtSemi is true, skipping will stop at a ';' + /// character. + /// + /// If SkipUntil finds the specified token, it returns true, otherwise it + /// returns false. + bool SkipUntil(tok::TokenKind T, bool StopAtSemi = true, + bool DontConsume = false) { + return SkipUntil(&T, 1, StopAtSemi, DontConsume); + } + bool SkipUntil(tok::TokenKind T1, tok::TokenKind T2, bool StopAtSemi = true, + bool DontConsume = false) { + tok::TokenKind TokArray[] = {T1, T2}; + return SkipUntil(TokArray, 2, StopAtSemi, DontConsume); + } + bool SkipUntil(const tok::TokenKind *Toks, unsigned NumToks, + bool StopAtSemi = true, bool DontConsume = false); + + //===--------------------------------------------------------------------===// + // C99 6.9: External Definitions. + DeclTy *ParseExternalDeclaration(); + DeclTy *ParseDeclarationOrFunctionDefinition(); + DeclTy *ParseFunctionDefinition(Declarator &D); + void ParseKNRParamDeclarations(Declarator &D); + void ParseSimpleAsm(); + void ParseAsmStringLiteral(); + + // Objective-C External Declarations + void ParseObjCAtDirectives(); + void ParseObjCAtClassDeclaration(SourceLocation atLoc); + void ParseObjCAtInterfaceDeclaration(); + void ParseObjCAtProtocolDeclaration(); + void ParseObjCAtImplementationDeclaration(); + void ParseObjCAtEndDeclaration(); + void ParseObjCAtAliasDeclaration(); + + void ParseObjCInstanceMethodDeclaration(); + void ParseObjCClassMethodDeclaration(); + + //===--------------------------------------------------------------------===// + // C99 6.5: Expressions. + + typedef Action::ExprResult ExprResult; + typedef Action::StmtResult StmtResult; + + ExprResult ParseExpression(); + ExprResult ParseConstantExpression(); + ExprResult ParseAssignmentExpression(); // Expr that doesn't include commas. + + ExprResult ParseExpressionWithLeadingIdentifier(const LexerToken &Tok); + ExprResult ParseAssignmentExprWithLeadingIdentifier(const LexerToken &Tok); + ExprResult ParseAssignmentExpressionWithLeadingStar(const LexerToken &Tok); + + ExprResult ParseRHSOfBinaryExpression(ExprResult LHS, unsigned MinPrec); + ExprResult ParseCastExpression(bool isUnaryExpression); + ExprResult ParsePostfixExpressionSuffix(ExprResult LHS); + ExprResult ParseSizeofAlignofExpression(); + ExprResult ParseBuiltinPrimaryExpression(); + + /// ParenParseOption - Control what ParseParenExpression will parse. + enum ParenParseOption { + SimpleExpr, // Only parse '(' expression ')' + CompoundStmt, // Also allow '(' compound-statement ')' + CompoundLiteral, // Also allow '(' type-name ')' '{' ... '}' + CastExpr // Also allow '(' type-name ')' <anything> + }; + ExprResult ParseParenExpression(ParenParseOption &ExprType, TypeTy *&CastTy, + SourceLocation &RParenLoc); + + ExprResult ParseSimpleParenExpression() { // Parse SimpleExpr only. + SourceLocation RParenLoc; + return ParseSimpleParenExpression(RParenLoc); + } + ExprResult ParseSimpleParenExpression(SourceLocation &RParenLoc) { + ParenParseOption Op = SimpleExpr; + TypeTy *CastTy; + return ParseParenExpression(Op, CastTy, RParenLoc); + } + ExprResult ParseStringLiteralExpression(); + + //===--------------------------------------------------------------------===// + // C++ 5.2p1: C++ Casts + ExprResult ParseCXXCasts(); + + //===--------------------------------------------------------------------===// + // C++ 2.13.5: C++ Boolean Literals + ExprResult ParseCXXBoolLiteral(); + + //===--------------------------------------------------------------------===// + // C99 6.7.8: Initialization. + ExprResult ParseInitializer(); + ExprResult ParseInitializerWithPotentialDesignator(); + + //===--------------------------------------------------------------------===// + // C99 6.8: Statements and Blocks. + + StmtResult ParseStatement() { return ParseStatementOrDeclaration(true); } + StmtResult ParseStatementOrDeclaration(bool OnlyStatement = false); + StmtResult ParseIdentifierStatement(bool OnlyStatement); + StmtResult ParseCaseStatement(); + StmtResult ParseDefaultStatement(); + StmtResult ParseCompoundStatement(); + StmtResult ParseCompoundStatementBody(); + StmtResult ParseIfStatement(); + StmtResult ParseSwitchStatement(); + StmtResult ParseWhileStatement(); + StmtResult ParseDoStatement(); + StmtResult ParseForStatement(); + StmtResult ParseGotoStatement(); + StmtResult ParseContinueStatement(); + StmtResult ParseBreakStatement(); + StmtResult ParseReturnStatement(); + StmtResult ParseAsmStatement(); + void ParseAsmOperandsOpt(); + + //===--------------------------------------------------------------------===// + // C99 6.7: Declarations. + + DeclTy *ParseDeclaration(unsigned Context); + DeclTy *ParseInitDeclaratorListAfterFirstDeclarator(Declarator &D); + void ParseDeclarationSpecifiers(DeclSpec &DS); + void ParseSpecifierQualifierList(DeclSpec &DS); + + bool ParseTag(DeclTy *&Decl, unsigned TagType, SourceLocation StartLoc); + void ParseEnumSpecifier(DeclSpec &DS); + void ParseEnumBody(SourceLocation StartLoc, DeclTy *TagDecl); + void ParseStructUnionSpecifier(DeclSpec &DS); + void ParseStructUnionBody(SourceLocation StartLoc, unsigned TagType, + DeclTy *TagDecl); + + bool isDeclarationSpecifier() const; + bool isTypeSpecifierQualifier() const; + + TypeTy *ParseTypeName(); + AttributeList *ParseAttributes(); + + /// ParseDeclarator - Parse and verify a newly-initialized declarator. + void ParseDeclarator(Declarator &D); + void ParseDeclaratorInternal(Declarator &D); + void ParseTypeQualifierListOpt(DeclSpec &DS); + void ParseDirectDeclarator(Declarator &D); + void ParseParenDeclarator(Declarator &D); + void ParseBracketDeclarator(Declarator &D); +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Parse/Scope.h b/include/clang/Parse/Scope.h new file mode 100644 index 0000000000..367bbf2523 --- /dev/null +++ b/include/clang/Parse/Scope.h @@ -0,0 +1,146 @@ +//===--- Scope.h - Scope interface ------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Scope interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_PARSE_SCOPE_H +#define LLVM_CLANG_PARSE_SCOPE_H + +#include "clang/Parse/Action.h" +#include "llvm/ADT/SmallPtrSet.h" + +namespace clang { + +/// Scope - A scope is a transient data structure that is used while parsing the +/// program. It assists with resolving identifiers to the appropriate +/// declaration. +/// +class Scope { +public: + /// ScopeFlags - These are bitfields that are or'd together when creating a + /// scope, which defines the sorts of things the scope contains. + enum ScopeFlags { + /// FnScope - This indicates that the scope corresponds to a function, which + /// means that labels are set here. + FnScope = 0x01, + + /// BreakScope - This is a while,do,switch,for, etc that can have break + /// stmts embedded into it. + BreakScope = 0x02, + + /// ContinueScope - This is a while,do,for, which can have continue + /// stmt embedded into it. + ContinueScope = 0x04, + + /// HasBreak - This flag is set on 'BreakScope' scopes, when they actually + /// do contain a break stmt. + HasBreak = 0x08, + + /// HasContinue - This flag is set on 'ContinueScope' scopes, when they + /// actually do contain a continue stmt. + HasContinue = 0x10 + }; +private: + /// The parent scope for this scope. This is null for the translation-unit + /// scope. + Scope *AnyParent; + + /// Depth - This is the depth of this scope. The translation-unit scope has + /// depth 0. + unsigned Depth : 16; + + /// Flags - This contains a set of ScopeFlags, which indicates how the scope + /// interrelates with other control flow statements. + unsigned Flags : 8; + + /// FnParent - If this scope has a parent scope that is a function body, this + /// pointer is non-null and points to it. This is used for label processing. + Scope *FnParent; + + /// BreakParent/ContinueParent - This is a direct link to the immediately + /// preceeding BreakParent/ContinueParent if this scope is not one, or null if + /// there is no containing break/continue scope. + Scope *BreakParent, *ContinueParent; + + /// DeclsInScope - This keeps track of all declarations in this scope. When + /// the declaration is added to the scope, it is set as the current + /// declaration for the identifier in the IdentifierTable. When the scope is + /// popped, these declarations are removed from the IdentifierTable's notion + /// of current declaration. It is up to the current Action implementation to + /// implement these semantics. + typedef llvm::SmallPtrSet<Action::DeclTy*, 32> DeclSetTy; + DeclSetTy DeclsInScope; +public: + Scope(Scope *Parent, unsigned ScopeFlags) { + Init(Parent, ScopeFlags); + } + + /// getParent - Return the scope that this is nested in. + /// + Scope *getParent() const { return AnyParent; } + + /// getContinueParent - Return the closest scope that a continue statement + /// would be affected by. + Scope *getContinueParent() const { + return ContinueParent; + } + + /// getBreakParent - Return the closest scope that a break statement + /// would be affected by. + Scope *getBreakParent() const { + return BreakParent; + } + + + typedef DeclSetTy::iterator decl_iterator; + decl_iterator decl_begin() const { return DeclsInScope.begin(); } + decl_iterator decl_end() const { return DeclsInScope.end(); } + + void AddDecl(Action::DeclTy *D) { + DeclsInScope.insert(D); + } + + /// isDeclScope - Return true if this is the scope that the specified decl is + /// declared in. + bool isDeclScope(Action::DeclTy *D) { + return DeclsInScope.count(D) != 0; + } + + + /// Init - This is used by the parser to implement scope caching. + /// + void Init(Scope *Parent, unsigned ScopeFlags) { + assert((ScopeFlags & (HasBreak|HasContinue)) == 0 && + "These flags can't be set in ctor!"); + AnyParent = Parent; + Depth = AnyParent ? AnyParent->Depth+1 : 0; + Flags = ScopeFlags; + + if (AnyParent) { + FnParent = AnyParent->FnParent; + BreakParent = AnyParent->BreakParent; + ContinueParent = AnyParent->ContinueParent; + } else { + FnParent = BreakParent = ContinueParent = 0; + } + + // If this scope is a function or contains breaks/continues, remember it. + if (Flags & FnScope) FnParent = this; + if (Flags & BreakScope) BreakParent = this; + if (Flags & ContinueScope) ContinueParent = this; + + DeclsInScope.clear(); + } +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Sema/ASTStreamer.h b/include/clang/Sema/ASTStreamer.h new file mode 100644 index 0000000000..f55f8710ce --- /dev/null +++ b/include/clang/Sema/ASTStreamer.h @@ -0,0 +1,45 @@ +//===--- ASTStreamer.h - Stream ASTs for top-level decls --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Chris Lattner and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ASTStreamer interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_ASTSTREAMER_H +#define LLVM_CLANG_AST_ASTSTREAMER_H + +namespace clang { + class Preprocessor; + class ASTContext; + class Decl; + + /// ASTStreamerTy - This is an opaque type used to reference ASTStreamer + /// objects. + typedef void ASTStreamerTy; + + /// ASTStreamer_Init - Create an ASTStreamer with the specified ASTContext + /// and FileID. + ASTStreamerTy *ASTStreamer_Init(Preprocessor &pp, ASTContext &ctxt, + unsigned MainFileID); + + /// ASTStreamer_ReadTopLevelDecl - Parse and return one top-level declaration. + /// This returns null at end of file. + Decl *ASTStreamer_ReadTopLevelDecl(ASTStreamerTy *Streamer); + + /// ASTStreamer_PrintStats - Emit statistic information to stderr. + /// + void ASTStreamer_PrintStats(ASTStreamerTy *Streamer); + + /// ASTStreamer_Terminate - Gracefully shut down the streamer. + /// + void ASTStreamer_Terminate(ASTStreamerTy *Streamer); + +} // end namespace clang + +#endif diff --git a/test/Lexer/badstring_in_if0.c b/test/Lexer/badstring_in_if0.c new file mode 100644 index 0000000000..714f89b386 --- /dev/null +++ b/test/Lexer/badstring_in_if0.c @@ -0,0 +1,8 @@ +// RUN: clang -E %s 2>&1 | not grep error +#if 0 + + " + + ' + +#endif diff --git a/test/Lexer/block_cmt_end.c b/test/Lexer/block_cmt_end.c new file mode 100644 index 0000000000..b111b2a23e --- /dev/null +++ b/test/Lexer/block_cmt_end.c @@ -0,0 +1,27 @@ +/* + RUN: clang -E %s | grep bar && + RUN: clang -E %s | grep foo && + RUN: clang -E %s | not grep abc && + RUN: clang -E %s | not grep xyz && + RUN: clang -parse-ast-check %s + */ + +/* abc + +next comment ends with normal escaped newline: +*/ + +/* expected-warning {{escaped newline}} expected-warning {{backslash and newline}} *\ +/ + +bar + +/* xyz + +next comment ends with a trigraph escaped newline: */ + +/* expected-warning {{escaped newline between}} expected-warning {{backslash and newline separated by space}} expected-warning {{trigraph ends block comment}} *??/ +/ + +foo /* expected-error {{expected '=', ',', ';', 'asm', or '__attribute__' after declarator}} */ + diff --git a/test/Lexer/escape_newline.c b/test/Lexer/escape_newline.c new file mode 100644 index 0000000000..235ee51ef0 --- /dev/null +++ b/test/Lexer/escape_newline.c @@ -0,0 +1,7 @@ +// RUN: clang -E %s | grep -- ' ->' && +// RUN: clang -E %s 2>&1 | grep 'backslash and newline separated by space' && +// RUN: clang -E %s 2>&1 | grep 'trigraph converted' + +// This is an ugly way to spell a -> token. + -??/ +> diff --git a/test/Lexer/number.c b/test/Lexer/number.c new file mode 100644 index 0000000000..4e12cc7edc --- /dev/null +++ b/test/Lexer/number.c @@ -0,0 +1,4 @@ +// RUN: clang %s -fsyntax-only + +float X = 1.17549435e-38F; + diff --git a/test/Lexer/unknown-char.c b/test/Lexer/unknown-char.c new file mode 100644 index 0000000000..8bdfe60553 --- /dev/null +++ b/test/Lexer/unknown-char.c @@ -0,0 +1,2 @@ +// RUN: clang -E %s 2>&1 | not grep error + ` ` ` ` diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000000..2c35b4421e --- /dev/null +++ b/test/Makefile @@ -0,0 +1,3 @@ + +all: + find Lexer Preprocessor Parser -name '*.c*' -print -exec ./TestRunner.sh {} \; diff --git a/test/Parser/CompoundStmtScope.c b/test/Parser/CompoundStmtScope.c new file mode 100644 index 0000000000..d6a4730632 --- /dev/null +++ b/test/Parser/CompoundStmtScope.c @@ -0,0 +1,8 @@ +// RUN: clang -parse-ast-check %s + +int foo() { + { + typedef float X; + } + X Y; // expected-error {{use of undeclared identifier}} +} diff --git a/test/Parser/argument_qualified.c b/test/Parser/argument_qualified.c new file mode 100644 index 0000000000..cd92c3258c --- /dev/null +++ b/test/Parser/argument_qualified.c @@ -0,0 +1,5 @@ +// RUN: clang %s +int abc (const float x) { + return 1; +} + diff --git a/test/Parser/argument_redef.c b/test/Parser/argument_redef.c new file mode 100644 index 0000000000..c3dae512a3 --- /dev/null +++ b/test/Parser/argument_redef.c @@ -0,0 +1,6 @@ +/* RUN: clang -parse-ast-check %s +*/ + +int foo(int A) { /* expected-error {{previous definition is here}} */ + int A; /* expected-error {{redefinition of 'A'}} */ +} diff --git a/test/Parser/argument_scope.c b/test/Parser/argument_scope.c new file mode 100644 index 0000000000..8b9f065c82 --- /dev/null +++ b/test/Parser/argument_scope.c @@ -0,0 +1,6 @@ +// RUN: clang -fsyntax-only %s +typedef struct foo foo; + +void blah(int foo) { + foo = 1; +} diff --git a/test/Parser/attributes.c b/test/Parser/attributes.c new file mode 100644 index 0000000000..29e8c81f3b --- /dev/null +++ b/test/Parser/attributes.c @@ -0,0 +1,6 @@ +// RUN: clang -parse-ast-check %s + +static __inline void __attribute__((__always_inline__, __nodebug__)) // expected-warning {{extension used}} +foo (void) +{ +} diff --git a/test/Parser/bad-control.c b/test/Parser/bad-control.c new file mode 100644 index 0000000000..914393461a --- /dev/null +++ b/test/Parser/bad-control.c @@ -0,0 +1,9 @@ +/* RUN: clang -parse-ast-check %s +*/ +int foo() { + break; /* expected-error {{'break' statement not in loop or switch statement}} */ +} + +int foo2() { + continue; /* expected-error {{'continue' statement not in loop statement}} */ +} diff --git a/test/Parser/c-namespace.c b/test/Parser/c-namespace.c new file mode 100644 index 0000000000..2b380503ac --- /dev/null +++ b/test/Parser/c-namespace.c @@ -0,0 +1,6 @@ +// RUN: clang -fsyntax-only %s +void bla1() { + struct XXX; + int XXX; +} + diff --git a/test/Parser/cxx-bool.cpp b/test/Parser/cxx-bool.cpp new file mode 100644 index 0000000000..623dcb2887 --- /dev/null +++ b/test/Parser/cxx-bool.cpp @@ -0,0 +1,4 @@ +// RUN: clang -fsyntax-only %s + +bool a = true; +bool b = false; diff --git a/test/Parser/cxx-casting.cpp b/test/Parser/cxx-casting.cpp new file mode 100644 index 0000000000..638985faf7 --- /dev/null +++ b/test/Parser/cxx-casting.cpp @@ -0,0 +1,32 @@ +// RUN: clang -fsyntax-only %s +// XFAIL: * + +char *const_cast_test(const char *var) +{ + return const_cast<char*>(var); +} + +#if 0 +// FIXME: Uncomment when C++ is supported more. +struct A { + virtual ~A() {} +}; + +struct B : public A { +}; + +struct B *dynamic_cast_test(struct A *a) +{ + return dynamic_cast<struct B*>(a); +} +#endif + +char *reinterpret_cast_test() +{ + return reinterpret_cast<char*>(0xdeadbeef); +} + +double static_cast_test(int i) +{ + return static_cast<double>(i); +} diff --git a/test/Parser/cxx-reference.cpp b/test/Parser/cxx-reference.cpp new file mode 100644 index 0000000000..44a2a03c9c --- /dev/null +++ b/test/Parser/cxx-reference.cpp @@ -0,0 +1,17 @@ +// RUN: clang -parse-ast-check %s + +extern char *bork; +char *& bar = bork; + +void foo(int &a) { +} + +typedef int & A; + +void g(const A aref) { +} + +int & const X; // expected-error {{'const' qualifier may not be applied to a reference}} +int & volatile Y; // expected-error {{'volatile' qualifier may not be applied to a reference}} +int & const volatile Z; /* expected-error {{'const' qualifier may not be applied}} \ + expected-error {{'volatile' qualifier may not be applied}} */ diff --git a/test/Parser/declarators.c b/test/Parser/declarators.c new file mode 100644 index 0000000000..af599f82fe --- /dev/null +++ b/test/Parser/declarators.c @@ -0,0 +1,28 @@ +// RUN: clang %s -fsyntax-only + +extern int a1[]; + +void f0(); +void f1(int [*]); +void f2(int [const *]); +void f3(int [volatile const*]); +int f4(*XX)(void); + +char ((((*X)))); + +void (*signal(int, void (*)(int)))(int); + +int a, ***C, * const D, b(int); + +int *A; + +struct str; + +int test2(int *P, int A) { + struct str; + + // Hard case for array decl, not Array[*]. + int Array[*(int*)P+A]; +} + + diff --git a/test/Parser/expressions.c b/test/Parser/expressions.c new file mode 100644 index 0000000000..77201a80f9 --- /dev/null +++ b/test/Parser/expressions.c @@ -0,0 +1,30 @@ +// RUN: clang -fsyntax-only %s + +void test1() { + if (sizeof (int){ 1}); // sizeof compound literal + if (sizeof (int)); // sizeof type + + (int)4; // cast. + (int){4}; // compound literal. + + // FIXME: change this to the struct version when we can. + //int A = (struct{ int a;}){ 1}.a; + int A = (int){ 1}.a; +} + +int test2(int a, int b) { + return a ? a,b : a; +} + +int test3(int a, int b, int c) { + return a = b = c; +} + +int test4() { + test4(); +} + +int test_offsetof() { + // FIXME: change into something that is semantically correct. + __builtin_offsetof(int, a.b.c[4][5]); +} diff --git a/test/Parser/function-decls.c b/test/Parser/function-decls.c new file mode 100644 index 0000000000..ef93756cc6 --- /dev/null +++ b/test/Parser/function-decls.c @@ -0,0 +1,10 @@ +/* RUN: clang %s -parse-ast-print + */ + +void foo() { + int X; + X = sizeof(void (*(*)())()); + X = sizeof(int(*)(int, float, ...)); + X = sizeof(void (*(int arga, void (*argb)(double Y)))(void* Z)); +} + diff --git a/test/Parser/portability.c b/test/Parser/portability.c new file mode 100644 index 0000000000..a96aee5c65 --- /dev/null +++ b/test/Parser/portability.c @@ -0,0 +1,5 @@ +// RUN: clang -arch ppc -arch linux -fsyntax-only %s 2>&1 | grep note | wc -l | grep 1 + +// wchar_t varies across targets. +void *X = L"foo"; + diff --git a/test/Parser/recovery-1.c b/test/Parser/recovery-1.c new file mode 100644 index 0000000000..4dd1af17f4 --- /dev/null +++ b/test/Parser/recovery-1.c @@ -0,0 +1,7 @@ +// RUN: clang -fsyntax-only -fno-caret-diagnostics -pedantic %s 2>&1 | grep warning | wc -l | grep 1 +// RUN: clang -parse-ast-check %s + +char (((( /* expected-error {{to match this '('}} */ +*X x ] )))); /* expected-error {{expected ')'}} */ + +; // expected-warning {{ISO C does not allow an extra ';' outside of a function}} diff --git a/test/Parser/statements.c b/test/Parser/statements.c new file mode 100644 index 0000000000..b3f043eaaa --- /dev/null +++ b/test/Parser/statements.c @@ -0,0 +1,49 @@ +// RUN: clang -fsyntax-only %s + +int test1() { + { ; { ;;}} ;; +} + +int test2() { + if (0) { if (1) {} } else { } + + do { } while (0); + + while (0) while(0) do ; while(0); + + for (0;0;0) + for (;;) + for (9;0;2) + ; + for (int X = 0; 0; 0); +} + +int test3() { + switch (0) { + + case 4: + if (0) { + case 6: ; + } + default: + ; + } +} + +int test4() { + if (0); + + int X; // declaration in a block. + +foo: if (0); +} + +typedef int t; +void test5() { + if (0); + + //t x = 0; // FIXME: Enable when handling of typedef names is impl. + + if (0); +} + diff --git a/test/Parser/struct-recursion.c b/test/Parser/struct-recursion.c new file mode 100644 index 0000000000..c16f9fc185 --- /dev/null +++ b/test/Parser/struct-recursion.c @@ -0,0 +1,11 @@ +// RUN: clang %s -fsyntax-only + +// C99 6.7.2.3p11 + +// mutually recursive structs +struct s1 { struct s2 *A; }; +struct s2 { struct s1 *B; }; + +// both types are complete now. +struct s1 a; +struct s2 b; diff --git a/test/Parser/types.c b/test/Parser/types.c new file mode 100644 index 0000000000..f1ffb94b98 --- /dev/null +++ b/test/Parser/types.c @@ -0,0 +1,6 @@ +// RUN: clang %s -fsyntax-only + +// Test the X can be overloaded inside the struct. +typedef int X; +struct Y { short X; }; + diff --git a/test/Preprocessor/_Pragma-dependency.c b/test/Preprocessor/_Pragma-dependency.c new file mode 100644 index 0000000000..f7d7efe41b --- /dev/null +++ b/test/Preprocessor/_Pragma-dependency.c @@ -0,0 +1,7 @@ +// RUN: clang %s -E 2>&1 | grep 'DO_PRAGMA (STR' && +// RUN: clang %s -E 2>&1 | grep '7:12' + +#define DO_PRAGMA _Pragma +#define STR "GCC dependency \"parse.y\"") +// Test that this line is printed by caret diagnostics. +DO_PRAGMA (STR diff --git a/test/Preprocessor/_Pragma-location.c b/test/Preprocessor/_Pragma-location.c new file mode 100644 index 0000000000..152e71ac13 --- /dev/null +++ b/test/Preprocessor/_Pragma-location.c @@ -0,0 +1,4 @@ +// RUN: clang %s -E | not grep 'scratch space' + +#define push _Pragma ("pack(push)") +push diff --git a/test/Preprocessor/_Pragma-physloc.c b/test/Preprocessor/_Pragma-physloc.c new file mode 100644 index 0000000000..b8f5499c7a --- /dev/null +++ b/test/Preprocessor/_Pragma-physloc.c @@ -0,0 +1,6 @@ +// RUN: clang %s -E | grep '#pragma x y z' && +// RUN: clang %s -E | grep '#pragma a b c' + +_Pragma("x y z") +_Pragma("a b c") + diff --git a/test/Preprocessor/_Pragma-poison.c b/test/Preprocessor/_Pragma-poison.c new file mode 100644 index 0000000000..82a7fbecc2 --- /dev/null +++ b/test/Preprocessor/_Pragma-poison.c @@ -0,0 +1,8 @@ +// RUN: clang -Eonly %s 2>&1 | grep error | wc -l | grep 1 && +// RUN: clang -Eonly %s 2>&1 | grep 7:4 | wc -l | grep 1 + +#define BAR _Pragma ("GCC poison XYZW") XYZW /*NO ERROR*/ +XYZW // NO ERROR +BAR + XYZW // ERROR + diff --git a/test/Preprocessor/_Pragma-syshdr.c b/test/Preprocessor/_Pragma-syshdr.c new file mode 100644 index 0000000000..4d2d29e023 --- /dev/null +++ b/test/Preprocessor/_Pragma-syshdr.c @@ -0,0 +1,4 @@ +// RUN: clang %s -E 2>&1 | grep 'system_header ignored in main file' + +_Pragma ("GCC system_header") + diff --git a/test/Preprocessor/_Pragma-syshdr2.c b/test/Preprocessor/_Pragma-syshdr2.c new file mode 100644 index 0000000000..190e5a7a30 --- /dev/null +++ b/test/Preprocessor/_Pragma-syshdr2.c @@ -0,0 +1,5 @@ +// RUN: clang -E %s 2>&1 | grep 'file not found' + +#define DO_PRAGMA _Pragma +DO_PRAGMA ("GCC dependency \"blahblabh\"") + diff --git a/test/Preprocessor/builtin_line.c b/test/Preprocessor/builtin_line.c new file mode 100644 index 0000000000..c9ce558c1a --- /dev/null +++ b/test/Preprocessor/builtin_line.c @@ -0,0 +1,4 @@ +// RUN: clang %s -E | grep "^ 4" +#define FOO __LINE__ + + FOO diff --git a/test/Preprocessor/c99-6_10_3_3_p4.c b/test/Preprocessor/c99-6_10_3_3_p4.c new file mode 100644 index 0000000000..13d5661e36 --- /dev/null +++ b/test/Preprocessor/c99-6_10_3_3_p4.c @@ -0,0 +1,6 @@ +// RUN: clang -E %s | grep -F 'char p[] = "x ## y";' +#define hash_hash # ## # +#define mkstr(a) # a +#define in_between(a) mkstr(a) +#define join(c, d) in_between(c hash_hash d) +char p[] = join(x, y); diff --git a/test/Preprocessor/c99-6_10_3_4_p5.c b/test/Preprocessor/c99-6_10_3_4_p5.c new file mode 100644 index 0000000000..fa5f7358b9 --- /dev/null +++ b/test/Preprocessor/c99-6_10_3_4_p5.c @@ -0,0 +1,29 @@ +// Example from C99 6.10.3.4p5 + +// RUN: clang -E %s | grep -F 'f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1);' && +// RUN: clang -E %s | grep -F 'f(2 * (2 +(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1);' && +// RUN: clang -E %s | grep -F 'int i[] = { 1, 23, 4, 5, };' && +// RUN: clang -E %s | grep -F 'char c[2][6] = { "hello", "" };' + + +#define x 3 +#define f(a) f(x * (a)) +#undef x +#define x 2 +#define g f +#define z z[0] +#define h g(~ +#define m(a) a(w) +#define w 0,1 +#define t(a) a +#define p() int +#define q(x) x +#define r(x,y) x ## y +#define str(x) # x + f(y+1) + f(f(z)) % t(t(g)(0) + t)(1); + g(x+(3,4)-w) | h 5) & m +(f)^m(m); +p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,) }; +char c[2][6] = { str(hello), str() }; + + diff --git a/test/Preprocessor/c99-6_10_3_4_p6.c b/test/Preprocessor/c99-6_10_3_4_p6.c new file mode 100644 index 0000000000..ce7990a4cf --- /dev/null +++ b/test/Preprocessor/c99-6_10_3_4_p6.c @@ -0,0 +1,24 @@ +// Example from C99 6.10.3.4p6 + +// RUN: clang -E %s | grep -F 'printf("x" "1" "= %d, x" "2" "= s" x1, x2);' && +// RUN: clang -E %s | grep 'fputs("strncmp(\\"abc\\\\0d\\" \\"abc\\", .\\\\4.) == 0" ": @\\n", s);' && +// RUN: clang -E %s | grep -F 'include "vers2.h"' && +// RUN: clang -E %s | grep -F '"hello";' && +// RUN: clang -E %s | grep -F '"hello" ", world"' + +#define str(s) # s +#define xstr(s) str(s) +#define debug(s, t) printf("x" # s "= %d, x" # t "= s" \ + x ## s, x ## t) +#define INCFILE(n) vers ## n +#define glue(a, b) a ## b +#define xglue(a, b) glue(a, b) +#define HIGHLOW "hello" +#define LOW LOW ", world" +debug(1, 2); +fputs(str(strncmp("abc\0d" "abc", '\4') // this goes away + == 0) str(: @\n), s); +include xstr(INCFILE(2).h) +glue(HIGH, LOW); +xglue(HIGH, LOW) + diff --git a/test/Preprocessor/c99-6_10_3_4_p7.c b/test/Preprocessor/c99-6_10_3_4_p7.c new file mode 100644 index 0000000000..88957dfb67 --- /dev/null +++ b/test/Preprocessor/c99-6_10_3_4_p7.c @@ -0,0 +1,9 @@ +// Example from C99 6.10.3.4p7 + +// RUN: clang -E %s | grep -F 'int j[] = { 123, 45, 67, 89,' && +// RUN: clang -E %s | grep -F '10, 11, 12, };' + +#define t(x,y,z) x ## y ## z +int j[] = { t(1,2,3), t(,4,5), t(6,,7), t(8,9,), +t(10,,), t(,11,), t(,,12), t(,,) }; + diff --git a/test/Preprocessor/c99-6_10_3_4_p9.c b/test/Preprocessor/c99-6_10_3_4_p9.c new file mode 100644 index 0000000000..08b4637e0e --- /dev/null +++ b/test/Preprocessor/c99-6_10_3_4_p9.c @@ -0,0 +1,16 @@ +// Example from C99 6.10.3.4p9 + +// RUN: clang -E %s | grep -F 'fprintf(stderr, "Flag");' && +// RUN: clang -E %s | grep -F 'fprintf(stderr, "X = %d\n", x);' && +// RUN: clang -E %s | grep -F 'puts("The first, second, and third items.");' && +// RUN: clang -E %s | grep -F '((x>y)?puts("x>y"): printf("x is %d but y is %d", x, y));' + +#define debug(...) fprintf(stderr, __VA_ARGS__) +#define showlist(...) puts(#__VA_ARGS__) +#define report(test, ...) ((test)?puts(#test):\ + printf(__VA_ARGS__)) +debug("Flag"); +debug("X = %d\n", x); +showlist(The first, second, and third items.); +report(x>y, "x is %d but y is %d", x, y); + diff --git a/test/Preprocessor/comment_save.c b/test/Preprocessor/comment_save.c new file mode 100644 index 0000000000..1a3bd96efb --- /dev/null +++ b/test/Preprocessor/comment_save.c @@ -0,0 +1,7 @@ +// RUN: clang -E -C %s | grep '^// foo$' && +// RUN: clang -E -C %s | grep -F '^/* bar */$' + +// foo +/* bar */ + + diff --git a/test/Preprocessor/comment_save_if.c b/test/Preprocessor/comment_save_if.c new file mode 100644 index 0000000000..ce7b4c45b1 --- /dev/null +++ b/test/Preprocessor/comment_save_if.c @@ -0,0 +1,6 @@ +// RUN: clang %s -E -CC -pedantic 2>&1 | grep -v '^/' | not grep warning + +#if 1 /*bar */ + +#endif /*foo*/ + diff --git a/test/Preprocessor/comment_save_macro.c b/test/Preprocessor/comment_save_macro.c new file mode 100644 index 0000000000..635a6fd406 --- /dev/null +++ b/test/Preprocessor/comment_save_macro.c @@ -0,0 +1,8 @@ +// RUN: clang -E -C %s | grep '^boo bork bar // zot$' && +// RUN: clang -E -CC %s | grep -F '^boo bork /* blah*/ bar // zot$' && +// RUN: clang -E %s | grep '^boo bork bar$' + + +#define FOO bork // blah +boo FOO bar // zot + diff --git a/test/Preprocessor/cxx_and.cpp b/test/Preprocessor/cxx_and.cpp new file mode 100644 index 0000000000..b6bd00e529 --- /dev/null +++ b/test/Preprocessor/cxx_and.cpp @@ -0,0 +1,17 @@ +// RUN: clang -DA -DB -E %s | grep 'int a = 37 == 37' && +// RUN: clang -DA -E %s | grep 'int a = 927 == 927' && +// RUN: clang -DB -E %s | grep 'int a = 927 == 927' && +// RUN: clang -E %s | grep 'int a = 927 == 927' +#if defined(A) and defined(B) +#define X 37 +#else +#define X 927 +#endif + +#if defined(A) && defined(B) +#define Y 37 +#else +#define Y 927 +#endif + +int a = X == Y; diff --git a/test/Preprocessor/cxx_bitand.cpp b/test/Preprocessor/cxx_bitand.cpp new file mode 100644 index 0000000000..ecc52e89ab --- /dev/null +++ b/test/Preprocessor/cxx_bitand.cpp @@ -0,0 +1,16 @@ +// RUN: clang -DA=1 -DB=2 -E %s | grep 'int a = 927 == 927' && +// RUN: clang -DA=1 -DB=1 -E %s | grep 'int a = 37 == 37' && +// RUN: clang -E %s | grep 'int a = 927 == 927' +#if A bitand B +#define X 37 +#else +#define X 927 +#endif + +#if A & B +#define Y 37 +#else +#define Y 927 +#endif + +int a = X == Y; diff --git a/test/Preprocessor/cxx_bitor.cpp b/test/Preprocessor/cxx_bitor.cpp new file mode 100644 index 0000000000..36c44523df --- /dev/null +++ b/test/Preprocessor/cxx_bitor.cpp @@ -0,0 +1,18 @@ +// RUN: clang -DA=1 -DB=1 -E %s | grep 'int a = 37 == 37' && +// RUN: clang -DA=0 -DB=1 -E %s | grep 'int a = 37 == 37' && +// RUN: clang -DA=1 -DB=0 -E %s | grep 'int a = 37 == 37' && +// RUN: clang -DA=0 -DB=0 -E %s | grep 'int a = 927 == 927' && +// RUN: clang -E %s | grep 'int a = 927 == 927' +#if A bitor B +#define X 37 +#else +#define X 927 +#endif + +#if A | B +#define Y 37 +#else +#define Y 927 +#endif + +int a = X == Y; diff --git a/test/Preprocessor/cxx_compl.cpp b/test/Preprocessor/cxx_compl.cpp new file mode 100644 index 0000000000..12e589ffa1 --- /dev/null +++ b/test/Preprocessor/cxx_compl.cpp @@ -0,0 +1,16 @@ +// RUN: clang -DA=1 -E %s | grep 'int a = 37 == 37' && +// RUN: clang -DA=0 -E %s | grep 'int a = 927 == 927' && +// RUN: clang -E %s | grep 'int a = 927 == 927' +#if compl 0 bitand A +#define X 37 +#else +#define X 927 +#endif + +#if ~0 & A +#define Y 37 +#else +#define Y 927 +#endif + +int a = X == Y; diff --git a/test/Preprocessor/cxx_not.cpp b/test/Preprocessor/cxx_not.cpp new file mode 100644 index 0000000000..2587b0ab99 --- /dev/null +++ b/test/Preprocessor/cxx_not.cpp @@ -0,0 +1,15 @@ +// RUN: clang -DA=1 -E %s | grep 'int a = 927 == 927' && +// RUN: clang -E %s | grep 'int a = 37 == 37' +#if not defined(A) +#define X 37 +#else +#define X 927 +#endif + +#if ! defined(A) +#define Y 37 +#else +#define Y 927 +#endif + +int a = X == Y; diff --git a/test/Preprocessor/cxx_not_eq.cpp b/test/Preprocessor/cxx_not_eq.cpp new file mode 100644 index 0000000000..b0be7b3917 --- /dev/null +++ b/test/Preprocessor/cxx_not_eq.cpp @@ -0,0 +1,16 @@ +// RUN: clang -DA=1 -DB=1 -E %s | grep 'int a = 927 == 927' && +// RUN: clang -E %s | grep 'int a = 927 == 927' && +// RUN: clang -DA=1 -DB=2 -E %s | grep 'int a = 37 == 37' +#if A not_eq B +#define X 37 +#else +#define X 927 +#endif + +#if A != B +#define Y 37 +#else +#define Y 927 +#endif + +int a = X == Y; diff --git a/test/Preprocessor/cxx_oper_keyword.cpp b/test/Preprocessor/cxx_oper_keyword.cpp new file mode 100644 index 0000000000..66586e7b36 --- /dev/null +++ b/test/Preprocessor/cxx_oper_keyword.cpp @@ -0,0 +1,7 @@ +// RUN: not clang %s -E && +// RUN: clang %s -E -fno-operator-names + +// Not valid in C++ unless -fno-operator-names is passed. +#define and foo + + diff --git a/test/Preprocessor/cxx_oper_spelling.cpp b/test/Preprocessor/cxx_oper_spelling.cpp new file mode 100644 index 0000000000..fc8bc70e51 --- /dev/null +++ b/test/Preprocessor/cxx_oper_spelling.cpp @@ -0,0 +1,11 @@ +// RUN: clang -E %s | grep 'a: "and"' + +#define X(A) #A + +// C++'03 2.5p2: "In all respects of the language, each alternative +// token behaves the same, respectively, as its primary token, +// except for its spelling" +// +// This should be spelled as 'and', not '&&' +a: X(and) + diff --git a/test/Preprocessor/cxx_or.cpp b/test/Preprocessor/cxx_or.cpp new file mode 100644 index 0000000000..ce3fed1cd4 --- /dev/null +++ b/test/Preprocessor/cxx_or.cpp @@ -0,0 +1,17 @@ +// RUN: clang -DA -DB -E %s | grep 'int a = 37 == 37' && +// RUN: clang -DA -E %s | grep 'int a = 37 == 37' && +// RUN: clang -DB -E %s | grep 'int a = 37 == 37' && +// RUN: clang -E %s | grep 'int a = 927 == 927' +#if defined(A) or defined(B) +#define X 37 +#else +#define X 927 +#endif + +#if defined(A) || defined(B) +#define Y 37 +#else +#define Y 927 +#endif + +int a = X == Y; diff --git a/test/Preprocessor/cxx_true.cpp b/test/Preprocessor/cxx_true.cpp new file mode 100644 index 0000000000..5ebdaf8d2f --- /dev/null +++ b/test/Preprocessor/cxx_true.cpp @@ -0,0 +1,13 @@ +/* RUN: clang -E %s -x=c++ | grep block_1 && + RUN: clang -E %s -x=c++ | not grep block_2 && + RUN: clang -E %s -x=c | not grep block +*/ + +#if true +block_1 +#endif + +#if false +block_2 +#endif + diff --git a/test/Preprocessor/cxx_xor.cpp b/test/Preprocessor/cxx_xor.cpp new file mode 100644 index 0000000000..7a4c8822cd --- /dev/null +++ b/test/Preprocessor/cxx_xor.cpp @@ -0,0 +1,18 @@ +// RUN: clang -DA=1 -DB=1 -E %s | grep 'int a = 927 == 927' && +// RUN: clang -DA=0 -DB=1 -E %s | grep 'int a = 37 == 37' && +// RUN: clang -DA=1 -DB=0 -E %s | grep 'int a = 37 == 37' && +// RUN: clang -DA=0 -DB=0 -E %s | grep 'int a = 927 == 927' && +// RUN: clang -E %s | grep 'int a = 927 == 927' +#if A xor B +#define X 37 +#else +#define X 927 +#endif + +#if A ^ B +#define Y 37 +#else +#define Y 927 +#endif + +int a = X == Y; diff --git a/test/Preprocessor/define_other_target.c b/test/Preprocessor/define_other_target.c new file mode 100644 index 0000000000..367e1642f0 --- /dev/null +++ b/test/Preprocessor/define_other_target.c @@ -0,0 +1,27 @@ +// Note that the run lines are at the bottom of this file. + +#define_other_target TEST1 +TEST1 // diagnose + +#define_other_target TEST2 +#undef TEST2 +TEST2 // no diagnose + +#define_other_target TEST3 +#define TEST3 +TEST3 // no diagnose + +#define TEST4 +#define_other_target TEST4 +TEST4 // diagnose + + +// check success: +// RUN: clang -Eonly %s && + +// Check proper # of notes is emitted. +// RUN: clang -Eonly %s 2>&1 | grep note | wc -l | grep 2 && + +// Check that the diagnostics are the right ones. +// RUN: clang %s -Eonly -fno-caret-diagnostics 2>&1 | grep ':4:1: note' && +// RUN: clang %s -Eonly -fno-caret-diagnostics 2>&1 | grep ':16:1: note' diff --git a/test/Preprocessor/define_target.c b/test/Preprocessor/define_target.c new file mode 100644 index 0000000000..85a132f557 --- /dev/null +++ b/test/Preprocessor/define_target.c @@ -0,0 +1,27 @@ +// Note that the run lines are at the bottom of this file. + +#define_target TEST1 +TEST1 // diagnose + +#define_target TEST2 +#undef TEST2 +TEST2 // no diagnose + +#define_target TEST3 +#define TEST3 +TEST3 // no diagnose + +#define TEST4 +#define_target TEST4 +TEST4 // diagnose + + +// check success: +// RUN: clang -Eonly %s && + +// Check proper # of notes is emitted. +// RUN: clang -Eonly %s 2>&1 | grep note | wc -l | grep 2 && + +// Check that the diagnostics are the right ones. +// RUN: clang %s -Eonly -fno-caret-diagnostics 2>&1 | grep ':4:1: note' && +// RUN: clang %s -Eonly -fno-caret-diagnostics 2>&1 | grep ':16:1: note' diff --git a/test/Preprocessor/disabled-cond-diags.c b/test/Preprocessor/disabled-cond-diags.c new file mode 100644 index 0000000000..df9dc8919b --- /dev/null +++ b/test/Preprocessor/disabled-cond-diags.c @@ -0,0 +1,10 @@ +// RUN: clang -E %s 2>&1 | not grep "warning\|error" + +#if 0 + +// Shouldn't get warnings here. +??( ??) + +// Should not get an error here. +` ` ` ` +#endif diff --git a/test/Preprocessor/expr_liveness.c b/test/Preprocessor/expr_liveness.c new file mode 100644 index 0000000000..f14ac0a913 --- /dev/null +++ b/test/Preprocessor/expr_liveness.c @@ -0,0 +1,32 @@ +/* RUN: clang -E %s -DNO_ERRORS && + RUN: not clang -E %s + */ + +#ifdef NO_ERRORS +/* None of these divisions by zero are in live parts of the expression, do not + emit any diagnostics. */ + +#define MACRO_0 0 +#define MACRO_1 1 + +#if MACRO_0 && 10 / MACRO_0 +foo +#endif + +#if MACRO_1 || 10 / MACRO_0 +bar +#endif + +#if 0 ? 124/0 : 42 +#endif + +#else + + +/* The 1/0 is live, it should error out. */ +#if 0 && 1 ? 4 : 1 / 0 +baz +#endif + + +#endif diff --git a/test/Preprocessor/expr_usual_conversions.c b/test/Preprocessor/expr_usual_conversions.c new file mode 100644 index 0000000000..b2ccc40900 --- /dev/null +++ b/test/Preprocessor/expr_usual_conversions.c @@ -0,0 +1,8 @@ +// RUN: clang %s -E 2>&1 | grep warning | wc -l | grep 2 + +#define INTMAX_MIN (-9223372036854775807LL -1) + +#if (-42 + 0U) / -2 +foo +#endif + diff --git a/test/Preprocessor/file_to_include.h b/test/Preprocessor/file_to_include.h new file mode 100644 index 0000000000..97728ab083 --- /dev/null +++ b/test/Preprocessor/file_to_include.h @@ -0,0 +1,3 @@ + +#warning file successfully included + diff --git a/test/Preprocessor/hash_line.c b/test/Preprocessor/hash_line.c new file mode 100644 index 0000000000..788440ba82 --- /dev/null +++ b/test/Preprocessor/hash_line.c @@ -0,0 +1,8 @@ +// The 1 and # should not go on the same line. +// RUN: clang %s -E | not grep "1 #" && +// RUN: clang %s -E | grep '^1$' && +// RUN: clang %s -E | grep '^ #$' +1 +#define EMPTY +EMPTY # + diff --git a/test/Preprocessor/hash_space.c b/test/Preprocessor/hash_space.c new file mode 100644 index 0000000000..77f5cfc530 --- /dev/null +++ b/test/Preprocessor/hash_space.c @@ -0,0 +1,6 @@ +// RUN: clang %s -E | grep " #" + +// Should put a space before the # so that -fpreprocessed mode doesn't +// macro expand this again. +#define HASH # +HASH define foo bar diff --git a/test/Preprocessor/includeexpand.c b/test/Preprocessor/includeexpand.c new file mode 100644 index 0000000000..33637952b7 --- /dev/null +++ b/test/Preprocessor/includeexpand.c @@ -0,0 +1,12 @@ +// RUN: clang %s -fno-caret-diagnostics 2>&1 | grep 'file successfully included' | wc -l | grep 3 + +// XX expands to nothing. +#define XX + +#define FILE "file_to_include.h" +#include XX FILE + +#include FILE + + +#include "file_to_include.h" diff --git a/test/Preprocessor/indent_macro.c b/test/Preprocessor/indent_macro.c new file mode 100644 index 0000000000..0dcaa7b4b9 --- /dev/null +++ b/test/Preprocessor/indent_macro.c @@ -0,0 +1,6 @@ +// RUN: clang -E %s | grep '^ zzap$' + +// zzap is on a new line, should be indented. +#define BLAH zzap + BLAH + diff --git a/test/Preprocessor/macro_arg_keyword.c b/test/Preprocessor/macro_arg_keyword.c new file mode 100644 index 0000000000..1f9d7e790f --- /dev/null +++ b/test/Preprocessor/macro_arg_keyword.c @@ -0,0 +1,6 @@ +// RUN: clang -E %s | grep xxx-xxx + +#define foo(return) return-return + +foo(xxx) + diff --git a/test/Preprocessor/macro_defined.c b/test/Preprocessor/macro_defined.c new file mode 100644 index 0000000000..f120197716 --- /dev/null +++ b/test/Preprocessor/macro_defined.c @@ -0,0 +1,6 @@ +// RUN: clang %s -E 2>&1 | not grep error + +// This should not be rejected. +#ifdef defined +#endif + diff --git a/test/Preprocessor/macro_disable.c b/test/Preprocessor/macro_disable.c new file mode 100644 index 0000000000..33b856d8f6 --- /dev/null +++ b/test/Preprocessor/macro_disable.c @@ -0,0 +1,13 @@ +// RUN: clang -E %s | grep 'a: 2 + M_0(3)(4)(5);' && +// RUN: clang -E %s | grep 'b: 4 + 4 + 3 + 2 + 1 + M_0(3)(2)(1);' + +#define M_0(x) M_ ## x +#define M_1(x) x + M_0(0) +#define M_2(x) x + M_1(1) +#define M_3(x) x + M_2(2) +#define M_4(x) x + M_3(3) +#define M_5(x) x + M_4(4) + +a: M_0(1)(2)(3)(4)(5); +b: M_0(5)(4)(3)(2)(1); + diff --git a/test/Preprocessor/macro_disable2.c b/test/Preprocessor/macro_disable2.c new file mode 100644 index 0000000000..6e1f80469c --- /dev/null +++ b/test/Preprocessor/macro_disable2.c @@ -0,0 +1,8 @@ +// RUN: clang -E %s | grep 'A B C A B A C A B C A' + +#define A A B C +#define B B C A +#define C C A B + +A + diff --git a/test/Preprocessor/macro_disable3.c b/test/Preprocessor/macro_disable3.c new file mode 100644 index 0000000000..b358a55677 --- /dev/null +++ b/test/Preprocessor/macro_disable3.c @@ -0,0 +1,8 @@ +// RUN: clang %s -E | grep -F 'f(2 * (f(2 * (z[0]))));' +// Check for C99 6.10.3.4p2. + +#define f(a) f(x * (a)) +#define x 2 +#define z z[0] +f(f(z)); + diff --git a/test/Preprocessor/macro_expand.c b/test/Preprocessor/macro_expand.c new file mode 100644 index 0000000000..69a4835c5a --- /dev/null +++ b/test/Preprocessor/macro_expand.c @@ -0,0 +1,7 @@ +// RUN: clang -E %s | grep '^Y$' + +#define X() Y +#define Y() X + +X()()() + diff --git a/test/Preprocessor/macro_expandloc.c b/test/Preprocessor/macro_expandloc.c new file mode 100644 index 0000000000..00bba6f60d --- /dev/null +++ b/test/Preprocessor/macro_expandloc.c @@ -0,0 +1,6 @@ +// RUN: clang %s -E 2>&1 | grep '#include' +#define FOO 1 + +// The error message should be on the #include line, not the 1. +#include FOO + diff --git a/test/Preprocessor/macro_expandloc2.c b/test/Preprocessor/macro_expandloc2.c new file mode 100644 index 0000000000..3a833299fd --- /dev/null +++ b/test/Preprocessor/macro_expandloc2.c @@ -0,0 +1,6 @@ +// RUN: clang %s -E 2>&1 | grep '#include' +#define FOO BAR + +// The error message should be on the #include line, not the 1. +#include FOO + diff --git a/test/Preprocessor/macro_fn_comma_swallow.c b/test/Preprocessor/macro_fn_comma_swallow.c new file mode 100644 index 0000000000..d4f3bb9940 --- /dev/null +++ b/test/Preprocessor/macro_fn_comma_swallow.c @@ -0,0 +1,16 @@ +// Test the GNU comma swallowing extension. +// RUN: clang %s -E | grep 'foo{A, }' && +// RUN: clang %s -E | grep 'fo2{A,}' && +// RUN: clang %s -E | grep '{foo}' + +#define X(Y) foo{A, Y} +X() + +#define X2(Y) fo2{A,##Y} +X2() + +// should eat the comma. +#define X3(b, ...) {b, ## __VA_ARGS__} +X3(foo) + + diff --git a/test/Preprocessor/macro_fn_disable_expand.c b/test/Preprocessor/macro_fn_disable_expand.c new file mode 100644 index 0000000000..a9e1d46f06 --- /dev/null +++ b/test/Preprocessor/macro_fn_disable_expand.c @@ -0,0 +1,11 @@ +// RUN: clang %s -E | grep 'bar foo (2)' && +// RUN: clang %s -E | grep 'm(ABCD)' + +#define foo(x) bar x +foo(foo) (2) + + +#define m(a) a(w) +#define w ABCD +m(m) // m(ABCD) + diff --git a/test/Preprocessor/macro_fn_lparen_scan.c b/test/Preprocessor/macro_fn_lparen_scan.c new file mode 100644 index 0000000000..497ef230d2 --- /dev/null +++ b/test/Preprocessor/macro_fn_lparen_scan.c @@ -0,0 +1,27 @@ +// RUN: clang -E %s | grep 'noexp: foo y' && +// RUN: clang -E %s | grep 'expand: abc' && +// RUN: clang -E %s | grep 'noexp2: foo nonexp' && +// RUN: clang -E %s | grep 'expand2: abc' + +#define A foo +#define foo() abc +#define X A y + +// This should not expand to abc, because the foo macro isn't followed by (. +noexp: X + + +// This should expand to abc. +#undef X +#define X A () +expand: X + + +// This should be 'foo nonexp' +noexp2: A nonexp + +// This should expand +expand2: A ( +) + + diff --git a/test/Preprocessor/macro_fn_lparen_scan2.c b/test/Preprocessor/macro_fn_lparen_scan2.c new file mode 100644 index 0000000000..fa4d5047fb --- /dev/null +++ b/test/Preprocessor/macro_fn_lparen_scan2.c @@ -0,0 +1,7 @@ +// RUN: clang -E %s | grep 'FUNC (3 +1);' + +#define F(a) a +#define FUNC(a) (a+1) + +F(FUNC) FUNC (3); /* final token sequence is FUNC(3+1) */ + diff --git a/test/Preprocessor/macro_fn_placemarker.c b/test/Preprocessor/macro_fn_placemarker.c new file mode 100644 index 0000000000..30c0bcf47f --- /dev/null +++ b/test/Preprocessor/macro_fn_placemarker.c @@ -0,0 +1,5 @@ +// RUN: clang %s -E | grep 'foo(A, )' + +#define X(Y) foo(A, Y) +X() + diff --git a/test/Preprocessor/macro_fn_preexpand.c b/test/Preprocessor/macro_fn_preexpand.c new file mode 100644 index 0000000000..81a7c41726 --- /dev/null +++ b/test/Preprocessor/macro_fn_preexpand.c @@ -0,0 +1,12 @@ +// RUN: clang %s -E | grep 'pre: 1 1 X' && +// RUN: clang %s -E | grep 'nopre: 1A(X)' + +/* Preexpansion of argument. */ +#define A(X) 1 X +pre: A(A(X)) + +/* The ## operator disables preexpansion. */ +#undef A +#define A(X) 1 ## X +nopre: A(A(X)) + diff --git a/test/Preprocessor/macro_fn_varargs_iso.c b/test/Preprocessor/macro_fn_varargs_iso.c new file mode 100644 index 0000000000..716e920336 --- /dev/null +++ b/test/Preprocessor/macro_fn_varargs_iso.c @@ -0,0 +1,11 @@ + +// RUN: clang -E %s | grep 'foo{a, b, c, d, e}' && +// RUN: clang -E %s | grep 'foo2{d, C, B}' && +// RUN: clang -E %s | grep 'foo2{d,e, C, B}' + +#define va1(...) foo{a, __VA_ARGS__, e} +va1(b, c, d) +#define va2(a, b, ...) foo2{__VA_ARGS__, b, a} +va2(B, C, d) +va2(B, C, d,e) + diff --git a/test/Preprocessor/macro_fn_varargs_named.c b/test/Preprocessor/macro_fn_varargs_named.c new file mode 100644 index 0000000000..75ee96105d --- /dev/null +++ b/test/Preprocessor/macro_fn_varargs_named.c @@ -0,0 +1,7 @@ +// RUN: clang -E %s | grep '^a: x$' && +// RUN: clang -E %s | grep '^b: x y, z,h$' + +#define A(b, c...) b c +a: A(x) +b: A(x, y, z,h) + diff --git a/test/Preprocessor/macro_not_define.c b/test/Preprocessor/macro_not_define.c new file mode 100644 index 0000000000..388481a90e --- /dev/null +++ b/test/Preprocessor/macro_not_define.c @@ -0,0 +1,9 @@ +// RUN: clang -E %s | grep '^ # define X 3$' + +#define H # + #define D define + + #define DEFINE(a, b) H D a b + + DEFINE(X, 3) + diff --git a/test/Preprocessor/macro_paste_bad.c b/test/Preprocessor/macro_paste_bad.c new file mode 100644 index 0000000000..60caa42743 --- /dev/null +++ b/test/Preprocessor/macro_paste_bad.c @@ -0,0 +1,5 @@ +// RUN: clang -Eonly %s 2>&1 | grep error +// pasting ""x"" and ""+"" does not give a valid preprocessing token +#define XYZ x ## + +XYZ + diff --git a/test/Preprocessor/macro_paste_bcpl_comment.c b/test/Preprocessor/macro_paste_bcpl_comment.c new file mode 100644 index 0000000000..9a864d520c --- /dev/null +++ b/test/Preprocessor/macro_paste_bcpl_comment.c @@ -0,0 +1,5 @@ +// RUN: clang %s -Eonly 2>&1 | grep error + +#define COMM1 / ## / +COMM1 + diff --git a/test/Preprocessor/macro_paste_c_block_comment.c b/test/Preprocessor/macro_paste_c_block_comment.c new file mode 100644 index 0000000000..9299514c51 --- /dev/null +++ b/test/Preprocessor/macro_paste_c_block_comment.c @@ -0,0 +1,7 @@ +// RUN: clang %s -Eonly 2>&1 | grep error && +// RUN: clang %s -Eonly 2>&1 | not grep unterminated && +// RUN: clang %s -Eonly 2>&1 | not grep scratch + +#define COMM / ## * +COMM + diff --git a/test/Preprocessor/macro_paste_empty.c b/test/Preprocessor/macro_paste_empty.c new file mode 100644 index 0000000000..8b78ecddd6 --- /dev/null +++ b/test/Preprocessor/macro_paste_empty.c @@ -0,0 +1,13 @@ +// RUN: clang -E %s | grep 'a:Y' && +// RUN: clang -E %s | grep 'b:Y' && +// RUN: clang -E %s | grep 'c:YY' + +#define FOO(X) X ## Y +a:FOO() + +#define FOO2(X) Y ## X +b:FOO2() + +#define FOO3(X) X ## Y ## X ## Y ## X ## X +c:FOO3() + diff --git a/test/Preprocessor/macro_paste_hard.c b/test/Preprocessor/macro_paste_hard.c new file mode 100644 index 0000000000..be467456bf --- /dev/null +++ b/test/Preprocessor/macro_paste_hard.c @@ -0,0 +1,17 @@ +// RUN: clang -E %s | grep '1: aaab 2' && +// RUN: clang -E %s | grep '2: 2 baaa' && +// RUN: clang -E %s | grep '3: 2 xx' + +#define a(n) aaa ## n +#define b 2 +1: a(b b) // aaab 2 2 gets expanded, not b. + +#undef a +#undef b +#define a(n) n ## aaa +#define b 2 +2: a(b b) // 2 baaa 2 gets expanded, not b. + +#define baaa xx +3: a(b b) // 2 xx + diff --git a/test/Preprocessor/macro_paste_hashhash.c b/test/Preprocessor/macro_paste_hashhash.c new file mode 100644 index 0000000000..4ebf55e41f --- /dev/null +++ b/test/Preprocessor/macro_paste_hashhash.c @@ -0,0 +1,7 @@ +// RUN: clang -E %s | grep '^"x ## y";$' +#define hash_hash # ## # +#define mkstr(a) # a +#define in_between(a) mkstr(a) +#define join(c, d) in_between(c hash_hash d) +join(x, y); + diff --git a/test/Preprocessor/macro_paste_none.c b/test/Preprocessor/macro_paste_none.c new file mode 100644 index 0000000000..2ba2820b16 --- /dev/null +++ b/test/Preprocessor/macro_paste_none.c @@ -0,0 +1,6 @@ +// RUN: clang -E %s | grep '!!' + +#define A(B,C) B ## C + +!A(,)! + diff --git a/test/Preprocessor/macro_paste_simple.c b/test/Preprocessor/macro_paste_simple.c new file mode 100644 index 0000000000..e8dc1e8404 --- /dev/null +++ b/test/Preprocessor/macro_paste_simple.c @@ -0,0 +1,3 @@ +// clang %s -E | grep "barbaz123" + +#define FOO bar ## baz ## 123 diff --git a/test/Preprocessor/macro_paste_spacing.c b/test/Preprocessor/macro_paste_spacing.c new file mode 100644 index 0000000000..471ebcc015 --- /dev/null +++ b/test/Preprocessor/macro_paste_spacing.c @@ -0,0 +1,7 @@ +// RUN: clang %s -E | grep "^xy$" + +#define A x ## y +blah + +A + diff --git a/test/Preprocessor/macro_rescan.c b/test/Preprocessor/macro_rescan.c new file mode 100644 index 0000000000..2ceb2923ca --- /dev/null +++ b/test/Preprocessor/macro_rescan.c @@ -0,0 +1,9 @@ +// RUN: clang -E %s | grep 'ei_1 = (17 +1);' && +// RUN: clang -E %s | grep 'ei_2 = (M1)(17);' + +#define M1(a) (a+1) +#define M2(b) b + +int ei_1 = M2(M1)(17); /* becomes int ei_1 = (17+1); */ +int ei_2 = (M2(M1))(17); /* becomes int ei_2 = (M1)(17); */ + diff --git a/test/Preprocessor/macro_rescan2.c b/test/Preprocessor/macro_rescan2.c new file mode 100644 index 0000000000..4fae444002 --- /dev/null +++ b/test/Preprocessor/macro_rescan2.c @@ -0,0 +1,15 @@ +// RUN: clang %s -E | grep 'a: 2\*f(9)' && +// RUN: clang %s -E | grep 'b: 2\*9\*g' + +#define f(a) a*g +#define g f +a: f(2)(9) + +#undef f +#undef g + +#define f(a) a*g +#define g(a) f(a) + +b: f(2)(9) + diff --git a/test/Preprocessor/macro_rescan_varargs.c b/test/Preprocessor/macro_rescan_varargs.c new file mode 100644 index 0000000000..3c79d0e993 --- /dev/null +++ b/test/Preprocessor/macro_rescan_varargs.c @@ -0,0 +1,10 @@ +// RUN: clang -E %s | grep -F "1: F, (, 'a', 'b', );" && +// RUN: clang -E %s | grep -F "2: 'a' + 'b';" +#define LPAREN ( +#define RPAREN ) +#define F(x, y) x + y +#define ELLIP_FUNC(...) __VA_ARGS__ + +1: ELLIP_FUNC(F, LPAREN, 'a', 'b', RPAREN); /* 1st invocation */ +2: ELLIP_FUNC(F LPAREN 'a', 'b' RPAREN); /* 2nd invocation */ + diff --git a/test/Preprocessor/macro_rparen_scan.c b/test/Preprocessor/macro_rparen_scan.c new file mode 100644 index 0000000000..d4e62837ae --- /dev/null +++ b/test/Preprocessor/macro_rparen_scan.c @@ -0,0 +1,8 @@ +// RUN: clang -E %s | grep '^3 ;$' + +/* Right paren scanning, hard case. Should expand to 3. */ +#define i(x) 3 +#define a i(yz +#define b ) +a b ) ; + diff --git a/test/Preprocessor/macro_rparen_scan2.c b/test/Preprocessor/macro_rparen_scan2.c new file mode 100644 index 0000000000..99545e780d --- /dev/null +++ b/test/Preprocessor/macro_rparen_scan2.c @@ -0,0 +1,8 @@ +// clang -E %s | grep -F 'static int glob = (1 + 1 );' + +#define R_PAREN ) + +#define FUNC(a) a + +static int glob = (1 + FUNC(1 R_PAREN ); + diff --git a/test/Preprocessor/macro_space.c b/test/Preprocessor/macro_space.c new file mode 100644 index 0000000000..553fddb68f --- /dev/null +++ b/test/Preprocessor/macro_space.c @@ -0,0 +1,5 @@ +// RUN: clang %s -E | grep '! ,' + +#define XX +! XX, + diff --git a/test/Preprocessor/output_paste_avoid.c b/test/Preprocessor/output_paste_avoid.c new file mode 100644 index 0000000000..842063a908 --- /dev/null +++ b/test/Preprocessor/output_paste_avoid.c @@ -0,0 +1,12 @@ +// RUN: clang -E %s | grep '+ + - - + + = = =' && +// RUN: clang -E %s | not grep -F '...' + +// This should print as ".. ." to avoid turning into ... +#define y(a) ..a +y(.) + +#define PLUS + +#define EMPTY +#define f(x) =x= ++PLUS -EMPTY- PLUS+ f(=) + diff --git a/test/Preprocessor/paste_bad.c b/test/Preprocessor/paste_bad.c new file mode 100644 index 0000000000..89e879957e --- /dev/null +++ b/test/Preprocessor/paste_bad.c @@ -0,0 +1,17 @@ +// GCC PR 20077 +// RUN: not clang -E %s && +// RUN: not clang -E %s 2>&1 | grep error: | wc -l | grep 10 + +#define a a ## ## /* { dg-error "end of a macro expansion" } */ +#define b() b ## ## /* { dg-error "end of a macro expansion" } */ +#define c c ## /* { dg-error "end of a macro expansion" } */ +#define d() d ## /* { dg-error "end of a macro expansion" } */ + + +#define e ## ## e /* { dg-error "end of a macro expansion" } */ +#define f() ## ## f /* { dg-error "end of a macro expansion" } */ +#define g ## g /* { dg-error "end of a macro expansion" } */ +#define h() ## h /* { dg-error "end of a macro expansion" } */ +#define i ## /* { dg-error "end of a macro expansion" } */ +#define j() ## /* { dg-error "end of a macro expansion" } */ + diff --git a/test/Preprocessor/poison.c b/test/Preprocessor/poison.c new file mode 100644 index 0000000000..5df4b47918 --- /dev/null +++ b/test/Preprocessor/poison.c @@ -0,0 +1,4 @@ +// RUN: clang %s -E 2>&1 | grep error + +#pragma GCC poison rindex +rindex(some_string, 'h'); diff --git a/test/Preprocessor/poison_expansion.c b/test/Preprocessor/poison_expansion.c new file mode 100644 index 0000000000..3444bace4d --- /dev/null +++ b/test/Preprocessor/poison_expansion.c @@ -0,0 +1,9 @@ +// RUN: clang %s -E 2>&1 | not grep error + +#define strrchr rindex +#pragma GCC poison rindex + +// Can poison multiple times. +#pragma GCC poison rindex + +strrchr(some_string, 'h'); diff --git a/test/Preprocessor/pragma_unknown.c b/test/Preprocessor/pragma_unknown.c new file mode 100644 index 0000000000..ca2bea10e1 --- /dev/null +++ b/test/Preprocessor/pragma_unknown.c @@ -0,0 +1,6 @@ +// RUN: clang -E %s | grep '#pragma foo bar' + +// GCC doesn't expand macro args for unrecognized pragmas. +#define bar xX +#pragma foo bar + diff --git a/test/Preprocessor/stringize_misc.c b/test/Preprocessor/stringize_misc.c new file mode 100644 index 0000000000..b8e4480ef8 --- /dev/null +++ b/test/Preprocessor/stringize_misc.c @@ -0,0 +1,26 @@ +// RUN: clang -E %s | grep -F '"f(1, 2)" "g((x=y++, y))"' && +// RUN: clang -E %s | grep -F '"{a=1" "b=2;}"' && +// RUN: clang -E %s | grep -F '"<" "["' && +// RUN: clang -E %s | grep -F '"(,)" "(...)"' && +// RUN: clang -E %s | grep -F '{a=1 c=3; b=2;}' && +// RUN: clang -E %s | grep -F '"a COMMA b" "(a, b)"' + +#define M(x, y) #x #y + +M( f(1, 2), g((x=y++, y))) +M( {a=1 , b=2;} ) /* A semicolon is not a comma */ +M( <, [ ) /* Passes the arguments < and [ */ +M( (,), (...) ) /* Passes the arguments (,) and (...) */ + +#define START_END(start, end) start c=3; end + +START_END( {a=1 , b=2;} ) /* braces are not parentheses */ + +/* + * To pass a comma token as an argument it is + * necessary to write: + */ +#define COMMA , + +M(a COMMA b, (a, b)) + diff --git a/test/Preprocessor/stringize_space.c b/test/Preprocessor/stringize_space.c new file mode 100644 index 0000000000..8c83677d67 --- /dev/null +++ b/test/Preprocessor/stringize_space.c @@ -0,0 +1,4 @@ +// RUN: clang -E %s | grep -- '-"" , - "" , -"" , - ""' + +#define A(b) -#b , - #b , -# b , - # b +A() diff --git a/test/Sema/i-c-e1.c b/test/Sema/i-c-e1.c new file mode 100644 index 0000000000..cb4a9a33af --- /dev/null +++ b/test/Sema/i-c-e1.c @@ -0,0 +1,5 @@ +// RUN: clang %s -fsyntax-only + +void test1(int n, int* p) { *(n ? p : (void *)(7-7)) = 1; } +void test2(int n, int* p) { *(n ? p : (void *)0) = 1; } + diff --git a/test/Sema/implicit-def.c b/test/Sema/implicit-def.c new file mode 100644 index 0000000000..942f091d7b --- /dev/null +++ b/test/Sema/implicit-def.c @@ -0,0 +1,8 @@ +/* RUN: clang -parse-ast %s -std=c89 && + * RUN: not clang -parse-ast %s -std=c99 -pedantic-errors + */ + +int A() { + return X(); +} + diff --git a/test/Sema/unused-expr.c b/test/Sema/unused-expr.c new file mode 100644 index 0000000000..667f286f51 --- /dev/null +++ b/test/Sema/unused-expr.c @@ -0,0 +1,26 @@ +// RUN: clang -parse-ast-check %s + +int foo(int X, int Y); + +void bar(volatile int *VP, int *P, int A, + _Complex double C, volatile _Complex double VC) { + + VP == P; // expected-warning {{expression result unused}} + (void)A; // expected-warning {{expression result unused}} + (void)foo(1,2); // no warning. + + A == foo(1, 2); // expected-warning {{expression result unused}} + + foo(1,2)+foo(4,3); // expected-warning {{expression result unused}} + + + *P; // expected-warning {{expression result unused}} + *VP; // no warning. + P[4]; // expected-warning {{expression result unused}} + VP[4]; // no warning. + + // FIXME: SEMA explodes on these. + //__real__ C; + //__real__ VC; +} + diff --git a/test/Sema/void_arg.c b/test/Sema/void_arg.c new file mode 100644 index 0000000000..b390f59728 --- /dev/null +++ b/test/Sema/void_arg.c @@ -0,0 +1,21 @@ +/* RUN: clang -parse-ast %s 2>&1 | grep '6 diagnostics' + */ + +typedef void Void; + +void foo() { + int X; + + X = sizeof(int (void a)); + X = sizeof(int (int, void)); + X = sizeof(int (void, ...)); + + X = sizeof(int (Void a)); + X = sizeof(int (int, Void)); + X = sizeof(int (Void, ...)); + + // Accept these. + X = sizeof(int (void)); + X = sizeof(int (Void)); +} + diff --git a/test/TestRunner.sh b/test/TestRunner.sh new file mode 100755 index 0000000000..2c96fd5b48 --- /dev/null +++ b/test/TestRunner.sh @@ -0,0 +1,50 @@ +#!/bin/sh +# +# TestRunner.sh - This script is used to run arbitrary unit tests. Unit +# tests must contain the command used to run them in the input file, starting +# immediately after a "RUN:" string. +# +# This runner recognizes and replaces the following strings in the command: +# +# %s - Replaced with the input name of the program, or the program to +# execute, as appropriate. +# %llvmgcc - llvm-gcc command +# %llvmgxx - llvm-g++ command +# %prcontext - prcontext.tcl script +# + +FILENAME=$1 +TESTNAME=$1 +SUBST=$1 +FILENAME_ONLY=`basename $1` +OUTPUT=Output/$FILENAME_ONLY.out + +# create the output directory if it does not already exist +mkdir Output > /dev/null 2>&1 + +if test $# != 1; then + # If more than one parameter is passed in, there must be three parameters: + # The filename to read from (already processed), the command used to execute, + # and the file to output to. + SUBST=$2 + OUTPUT=$3 + TESTNAME=$3 +fi + +ulimit -t 40 + +SCRIPT=$OUTPUT.script +grep 'RUN:' $FILENAME | sed "s|^.*RUN:\(.*\)$|\1|g;s|%s|$SUBST|g;s|%llvmgcc|llvm-gcc -emit-llvm|g;s|%llvmgxx|llvm-g++ -emit-llvm|g;s|%prcontext|prcontext.tcl|g" > $SCRIPT + +grep -q XFAIL $FILENAME && (printf "XFAILED '$TESTNAME': "; grep XFAIL $FILENAME) + +/bin/sh $SCRIPT > $OUTPUT 2>&1 || ( + echo "******************** TEST '$TESTNAME' FAILED! ********************" + echo "Command: " + cat $SCRIPT + echo "Output:" + cat $OUTPUT + rm $OUTPUT + echo "******************** TEST '$TESTNAME' FAILED! ********************" +) + |