8 files changed, 2786 insertions, 0 deletions
diff --git a/lib/Frontend/CacheTokens.cpp b/lib/Frontend/CacheTokens.cpp
new file mode 100644
index 0000000000..0065828c6d
--- /dev/null
+++ b/lib/Frontend/CacheTokens.cpp
@@ -0,0 +1,658 @@
+//===--- CacheTokens.cpp - Caching of lexer tokens for PTH support --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides a possible implementation of PTH support for Clang that is
+// based on caching lexed tokens and identifiers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Frontend/Utils.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/OnDiskHashTable.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Path.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Streams.h"
+
+// FIXME: put this somewhere else?
+#ifndef S_ISDIR
+#define S_ISDIR(x) (((x)&_S_IFDIR)!=0)
+#endif
+
+using namespace clang;
+using namespace clang::io;
+
+//===----------------------------------------------------------------------===//
+// PTH-specific stuff.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class VISIBILITY_HIDDEN PTHEntry {
+  Offset TokenData, PPCondData;  
+
+public:  
+  PTHEntry() {}
+
+  PTHEntry(Offset td, Offset ppcd)
+    : TokenData(td), PPCondData(ppcd) {}
+  
+  Offset getTokenOffset() const { return TokenData; }  
+  Offset getPPCondTableOffset() const { return PPCondData; }
+};
+  
+  
+class VISIBILITY_HIDDEN PTHEntryKeyVariant {
+  union { const FileEntry* FE; const char* Path; };
+  enum { IsFE = 0x1, IsDE = 0x2, IsNoExist = 0x0 } Kind;
+  struct stat *StatBuf;
+public:
+  PTHEntryKeyVariant(const FileEntry *fe)
+    : FE(fe), Kind(IsFE), StatBuf(0) {}
+
+  PTHEntryKeyVariant(struct stat* statbuf, const char* path)
+    : Path(path), Kind(IsDE), StatBuf(new struct stat(*statbuf)) {}
+
+  PTHEntryKeyVariant(const char* path)
+    : Path(path), Kind(IsNoExist), StatBuf(0) {}
+  
+  bool isFile() const { return Kind == IsFE; }
+  
+  const char* getCString() const {
+    return Kind == IsFE ? FE->getName() : Path;
+  }
+  
+  unsigned getKind() const { return (unsigned) Kind; }
+  
+  void EmitData(llvm::raw_ostream& Out) {
+    switch (Kind) {
+      case IsFE:
+        // Emit stat information.
+        ::Emit32(Out, FE->getInode());
+        ::Emit32(Out, FE->getDevice());
+        ::Emit16(Out, FE->getFileMode());
+        ::Emit64(Out, FE->getModificationTime());
+        ::Emit64(Out, FE->getSize());
+        break;
+      case IsDE:
+        // Emit stat information.
+        ::Emit32(Out, (uint32_t) StatBuf->st_ino);
+        ::Emit32(Out, (uint32_t) StatBuf->st_dev);
+        ::Emit16(Out, (uint16_t) StatBuf->st_mode);
+        ::Emit64(Out, (uint64_t) StatBuf->st_mtime);
+        ::Emit64(Out, (uint64_t) StatBuf->st_size);
+        delete StatBuf;
+        break;
+      default:
+        break;
+    }
+  }
+  
+  unsigned getRepresentationLength() const {
+    return Kind == IsNoExist ? 0 : 4 + 4 + 2 + 8 + 8;
+  }
+};
+  
+class VISIBILITY_HIDDEN FileEntryPTHEntryInfo {
+public:
+  typedef PTHEntryKeyVariant key_type;
+  typedef key_type key_type_ref;
+  
+  typedef PTHEntry data_type;
+  typedef const PTHEntry& data_type_ref;
+  
+  static unsigned ComputeHash(PTHEntryKeyVariant V) {
+    return BernsteinHash(V.getCString());
+  }
+  
+  static std::pair<unsigned,unsigned> 
+  EmitKeyDataLength(llvm::raw_ostream& Out, PTHEntryKeyVariant V,
+                    const PTHEntry& E) {
+
+    unsigned n = strlen(V.getCString()) + 1 + 1;
+    ::Emit16(Out, n);
+    
+    unsigned m = V.getRepresentationLength() + (V.isFile() ? 4 + 4 : 0);
+    ::Emit8(Out, m);
+
+    return std::make_pair(n, m);
+  }
+  
+  static void EmitKey(llvm::raw_ostream& Out, PTHEntryKeyVariant V, unsigned n){
+    // Emit the entry kind.
+    ::Emit8(Out, (unsigned) V.getKind());
+    // Emit the string.
+    Out.write(V.getCString(), n - 1);
+  }
+  
+  static void EmitData(llvm::raw_ostream& Out, PTHEntryKeyVariant V, 
+                       const PTHEntry& E, unsigned) {
+
+
+    // For file entries emit the offsets into the PTH file for token data
+    // and the preprocessor blocks table.
+    if (V.isFile()) {
+      ::Emit32(Out, E.getTokenOffset());
+      ::Emit32(Out, E.getPPCondTableOffset());
+    }
+    
+    // Emit any other data associated with the key (i.e., stat information).
+    V.EmitData(Out);
+  }        
+};
+  
+class OffsetOpt {
+  bool valid;
+  Offset off;
+public:
+  OffsetOpt() : valid(false) {}
+  bool hasOffset() const { return valid; }
+  Offset getOffset() const { assert(valid); return off; }
+  void setOffset(Offset o) { off = o; valid = true; }
+};
+} // end anonymous namespace
+
+typedef OnDiskChainedHashTableGenerator<FileEntryPTHEntryInfo> PTHMap;
+typedef llvm::DenseMap<const IdentifierInfo*,uint32_t> IDMap;
+typedef llvm::StringMap<OffsetOpt, llvm::BumpPtrAllocator> CachedStrsTy;
+
+namespace {
+class VISIBILITY_HIDDEN PTHWriter {
+  IDMap IM;
+  llvm::raw_fd_ostream& Out;
+  Preprocessor& PP;
+  uint32_t idcount;
+  PTHMap PM;
+  CachedStrsTy CachedStrs;
+  Offset CurStrOffset;
+  std::vector<llvm::StringMapEntry<OffsetOpt>*> StrEntries;
+
+  //// Get the persistent id for the given IdentifierInfo*.
+  uint32_t ResolveID(const IdentifierInfo* II);
+  
+  /// Emit a token to the PTH file.
+  void EmitToken(const Token& T);
+
+  void Emit8(uint32_t V) {
+    Out << (unsigned char)(V);
+  }
+    
+  void Emit16(uint32_t V) { ::Emit16(Out, V); }
+  
+  void Emit24(uint32_t V) {
+    Out << (unsigned char)(V);
+    Out << (unsigned char)(V >>  8);
+    Out << (unsigned char)(V >> 16);
+    assert((V >> 24) == 0);
+  }
+
+  void Emit32(uint32_t V) { ::Emit32(Out, V); }
+
+  void EmitBuf(const char *Ptr, unsigned NumBytes) {
+    Out.write(Ptr, NumBytes);
+  }
+  
+  /// EmitIdentifierTable - Emits two tables to the PTH file.  The first is
+  ///  a hashtable mapping from identifier strings to persistent IDs.
+  ///  The second is a straight table mapping from persistent IDs to string data
+  ///  (the keys of the first table).
+  std::pair<Offset, Offset> EmitIdentifierTable();
+  
+  /// EmitFileTable - Emit a table mapping from file name strings to PTH
+  /// token data.
+  Offset EmitFileTable() { return PM.Emit(Out); }
+
+  PTHEntry LexTokens(Lexer& L);
+  Offset EmitCachedSpellings();
+
+public:
+  PTHWriter(llvm::raw_fd_ostream& out, Preprocessor& pp) 
+    : Out(out), PP(pp), idcount(0), CurStrOffset(0) {}
+    
+  PTHMap &getPM() { return PM; }
+  void GeneratePTH(const std::string *MainFile = 0);
+};
+} // end anonymous namespace
+  
+uint32_t PTHWriter::ResolveID(const IdentifierInfo* II) {  
+  // Null IdentifierInfo's map to the persistent ID 0.
+  if (!II)
+    return 0;
+  
+  IDMap::iterator I = IM.find(II);
+  if (I != IM.end())
+    return I->second; // We've already added 1.
+    
+  IM[II] = ++idcount; // Pre-increment since '0' is reserved for NULL.
+  return idcount;
+}
+
+void PTHWriter::EmitToken(const Token& T) {
+  // Emit the token kind, flags, and length.
+  Emit32(((uint32_t) T.getKind()) | ((((uint32_t) T.getFlags())) << 8)|
+         (((uint32_t) T.getLength()) << 16));
+    
+  if (!T.isLiteral()) {
+    Emit32(ResolveID(T.getIdentifierInfo()));
+  } else {
+    // We cache *un-cleaned* spellings. This gives us 100% fidelity with the
+    // source code.
+    const char* s = T.getLiteralData();
+    unsigned len = T.getLength();
+
+    // Get the string entry.
+    llvm::StringMapEntry<OffsetOpt> *E = &CachedStrs.GetOrCreateValue(s, s+len);
+    
+    // If this is a new string entry, bump the PTH offset.
+    if (!E->getValue().hasOffset()) {
+      E->getValue().setOffset(CurStrOffset);
+      StrEntries.push_back(E);
+      CurStrOffset += len + 1;
+    }
+    
+    // Emit the relative offset into the PTH file for the spelling string.
+    Emit32(E->getValue().getOffset());
+  }
+  
+  // Emit the offset into the original source file of this token so that we
+  // can reconstruct its SourceLocation.
+  Emit32(PP.getSourceManager().getFileOffset(T.getLocation()));
+}
+
+PTHEntry PTHWriter::LexTokens(Lexer& L) {
+  // Pad 0's so that we emit tokens to a 4-byte alignment.
+  // This speed up reading them back in.
+  Pad(Out, 4);
+  Offset off = (Offset) Out.tell();
+  
+  // Keep track of matching '#if' ... '#endif'.
+  typedef std::vector<std::pair<Offset, unsigned> > PPCondTable;
+  PPCondTable PPCond;
+  std::vector<unsigned> PPStartCond;
+  bool ParsingPreprocessorDirective = false;
+  Token Tok;
+  
+  do {
+    L.LexFromRawLexer(Tok);
+  NextToken:
+
+    if ((Tok.isAtStartOfLine() || Tok.is(tok::eof)) &&
+        ParsingPreprocessorDirective) {
+      // Insert an eom token into the token cache.  It has the same
+      // position as the next token that is not on the same line as the
+      // preprocessor directive.  Observe that we continue processing
+      // 'Tok' when we exit this branch.
+      Token Tmp = Tok;
+      Tmp.setKind(tok::eom);
+      Tmp.clearFlag(Token::StartOfLine);
+      Tmp.setIdentifierInfo(0);
+      EmitToken(Tmp);
+      ParsingPreprocessorDirective = false;
+    }
+    
+    if (Tok.is(tok::identifier)) {
+      Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
+      EmitToken(Tok);
+      continue;
+    }
+
+    if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) {
+      // Special processing for #include.  Store the '#' token and lex
+      // the next token.
+      assert(!ParsingPreprocessorDirective);
+      Offset HashOff = (Offset) Out.tell();
+      EmitToken(Tok);
+
+      // Get the next token.
+      L.LexFromRawLexer(Tok);
+
+      // If we see the start of line, then we had a null directive "#".
+      if (Tok.isAtStartOfLine())
+        goto NextToken;
+      
+      // Did we see 'include'/'import'/'include_next'?
+      if (Tok.isNot(tok::identifier)) {
+        EmitToken(Tok);
+        continue;
+      }
+      
+      IdentifierInfo* II = PP.LookUpIdentifierInfo(Tok);
+      Tok.setIdentifierInfo(II);
+      tok::PPKeywordKind K = II->getPPKeywordID();
+      
+      ParsingPreprocessorDirective = true;
+      
+      switch (K) {
+      case tok::pp_not_keyword:
+        // Invalid directives "#foo" can occur in #if 0 blocks etc, just pass
+        // them through.
+      default:
+        break;
+          
+      case tok::pp_include:
+      case tok::pp_import:
+      case tok::pp_include_next: {        
+        // Save the 'include' token.
+        EmitToken(Tok);
+        // Lex the next token as an include string.
+        L.setParsingPreprocessorDirective(true);
+        L.LexIncludeFilename(Tok); 
+        L.setParsingPreprocessorDirective(false);
+        assert(!Tok.isAtStartOfLine());
+        if (Tok.is(tok::identifier))
+          Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
+        
+        break;
+      }
+      case tok::pp_if:
+      case tok::pp_ifdef:
+      case tok::pp_ifndef: {
+        // Add an entry for '#if' and friends.  We initially set the target
+        // index to 0.  This will get backpatched when we hit #endif.
+        PPStartCond.push_back(PPCond.size());
+        PPCond.push_back(std::make_pair(HashOff, 0U));
+        break;
+      }
+      case tok::pp_endif: {
+        // Add an entry for '#endif'.  We set the target table index to itself.
+        // This will later be set to zero when emitting to the PTH file.  We
+        // use 0 for uninitialized indices because that is easier to debug.
+        unsigned index = PPCond.size();
+        // Backpatch the opening '#if' entry.
+        assert(!PPStartCond.empty());
+        assert(PPCond.size() > PPStartCond.back());
+        assert(PPCond[PPStartCond.back()].second == 0);
+        PPCond[PPStartCond.back()].second = index;
+        PPStartCond.pop_back();        
+        // Add the new entry to PPCond.      
+        PPCond.push_back(std::make_pair(HashOff, index));
+        EmitToken(Tok);
+        
+        // Some files have gibberish on the same line as '#endif'.
+        // Discard these tokens.
+        do
+          L.LexFromRawLexer(Tok);
+        while (Tok.isNot(tok::eof) && !Tok.isAtStartOfLine());
+        // We have the next token in hand.
+        // Don't immediately lex the next one.
+        goto NextToken;        
+      }
+      case tok::pp_elif:
+      case tok::pp_else: {
+        // Add an entry for #elif or #else.
+        // This serves as both a closing and opening of a conditional block.
+        // This means that its entry will get backpatched later.
+        unsigned index = PPCond.size();
+        // Backpatch the previous '#if' entry.
+        assert(!PPStartCond.empty());
+        assert(PPCond.size() > PPStartCond.back());
+        assert(PPCond[PPStartCond.back()].second == 0);
+        PPCond[PPStartCond.back()].second = index;
+        PPStartCond.pop_back();
+        // Now add '#elif' as a new block opening.
+        PPCond.push_back(std::make_pair(HashOff, 0U));
+        PPStartCond.push_back(index);
+        break;
+      }
+      }
+    }
+    
+    EmitToken(Tok);
+  }
+  while (Tok.isNot(tok::eof));
+
+  assert(PPStartCond.empty() && "Error: imblanced preprocessor conditionals.");
+
+  // Next write out PPCond.
+  Offset PPCondOff = (Offset) Out.tell();
+
+  // Write out the size of PPCond so that clients can identifer empty tables.
+  Emit32(PPCond.size());
+
+  for (unsigned i = 0, e = PPCond.size(); i!=e; ++i) {
+    Emit32(PPCond[i].first - off);
+    uint32_t x = PPCond[i].second;
+    assert(x != 0 && "PPCond entry not backpatched.");
+    // Emit zero for #endifs.  This allows us to do checking when
+    // we read the PTH file back in.
+    Emit32(x == i ? 0 : x);
+  }
+
+  return PTHEntry(off, PPCondOff);
+}
+
+Offset PTHWriter::EmitCachedSpellings() {
+  // Write each cached strings to the PTH file.
+  Offset SpellingsOff = Out.tell();
+  
+  for (std::vector<llvm::StringMapEntry<OffsetOpt>*>::iterator
+       I = StrEntries.begin(), E = StrEntries.end(); I!=E; ++I)
+    EmitBuf((*I)->getKeyData(), (*I)->getKeyLength()+1 /*nul included*/);
+  
+  return SpellingsOff;
+}
+
+void PTHWriter::GeneratePTH(const std::string *MainFile) {
+  // Generate the prologue.
+  Out << "cfe-pth";
+  Emit32(PTHManager::Version);
+  
+  // Leave 4 words for the prologue.
+  Offset PrologueOffset = Out.tell();
+  for (unsigned i = 0; i < 4; ++i)
+    Emit32(0);
+    
+  // Write the name of the MainFile.
+  if (MainFile && !MainFile->empty()) {
+    Emit16(MainFile->length());
+    EmitBuf(MainFile->data(), MainFile->length());
+  } else {
+    // String with 0 bytes.
+    Emit16(0);
+  }
+  Emit8(0);
+  
+  // Iterate over all the files in SourceManager.  Create a lexer
+  // for each file and cache the tokens.
+  SourceManager &SM = PP.getSourceManager();
+  const LangOptions &LOpts = PP.getLangOptions();
+  
+  for (SourceManager::fileinfo_iterator I = SM.fileinfo_begin(),
+       E = SM.fileinfo_end(); I != E; ++I) {
+    const SrcMgr::ContentCache &C = *I->second;
+    const FileEntry *FE = C.Entry;
+    
+    // FIXME: Handle files with non-absolute paths.
+    llvm::sys::Path P(FE->getName());
+    if (!P.isAbsolute())
+      continue;
+
+    const llvm::MemoryBuffer *B = C.getBuffer();
+    if (!B) continue;
+
+    FileID FID = SM.createFileID(FE, SourceLocation(), SrcMgr::C_User);
+    Lexer L(FID, SM, LOpts);
+    PM.insert(FE, LexTokens(L));
+  }
+
+  // Write out the identifier table.
+  const std::pair<Offset,Offset> &IdTableOff = EmitIdentifierTable();
+  
+  // Write out the cached strings table.
+  Offset SpellingOff = EmitCachedSpellings();
+  
+  // Write out the file table.
+  Offset FileTableOff = EmitFileTable();  
+  
+  // Finally, write the prologue.
+  Out.seek(PrologueOffset);
+  Emit32(IdTableOff.first);
+  Emit32(IdTableOff.second);
+  Emit32(FileTableOff);
+  Emit32(SpellingOff);
+}
+
+namespace {
+/// StatListener - A simple "interpose" object used to monitor stat calls
+/// invoked by FileManager while processing the original sources used
+/// as input to PTH generation.  StatListener populates the PTHWriter's
+/// file map with stat information for directories as well as negative stats.
+/// Stat information for files are populated elsewhere.
+class StatListener : public StatSysCallCache {
+  PTHMap &PM;
+public:
+  StatListener(PTHMap &pm) : PM(pm) {}
+  ~StatListener() {}
+  
+  int stat(const char *path, struct stat *buf) {
+    int result = ::stat(path, buf);
+    
+    if (result != 0) // Failed 'stat'.
+      PM.insert(path, PTHEntry());
+    else if (S_ISDIR(buf->st_mode)) {
+      // Only cache directories with absolute paths.
+      if (!llvm::sys::Path(path).isAbsolute())
+        return result;
+      
+      PM.insert(PTHEntryKeyVariant(buf, path), PTHEntry());
+    }
+    
+    return result;
+  }
+};
+} // end anonymous namespace
+
+
+void clang::CacheTokens(Preprocessor &PP, llvm::raw_fd_ostream* OS) {
+  // Get the name of the main file.
+  const SourceManager &SrcMgr = PP.getSourceManager();
+  const FileEntry *MainFile = SrcMgr.getFileEntryForID(SrcMgr.getMainFileID());
+  llvm::sys::Path MainFilePath(MainFile->getName());
+  std::string MainFileName;
+  
+  if (!MainFilePath.isAbsolute()) {
+    llvm::sys::Path P = llvm::sys::Path::GetCurrentDirectory();
+    P.appendComponent(MainFilePath.toString());
+    MainFileName = P.toString();
+  } else {
+    MainFileName = MainFilePath.toString();
+  }
+
+  // Create the PTHWriter.
+  PTHWriter PW(*OS, PP);
+  
+  // Install the 'stat' system call listener in the FileManager.
+  PP.getFileManager().setStatCache(new StatListener(PW.getPM()));
+  
+  // Lex through the entire file.  This will populate SourceManager with
+  // all of the header information.
+  Token Tok;
+  PP.EnterMainSourceFile();
+  do { PP.Lex(Tok); } while (Tok.isNot(tok::eof));
+  
+  // Generate the PTH file.
+  PP.getFileManager().setStatCache(0);
+  PW.GeneratePTH(&MainFileName);
+}
+
+//===----------------------------------------------------------------------===//
+
+class PTHIdKey {
+public:
+  const IdentifierInfo* II;
+  uint32_t FileOffset;
+};
+
+namespace {
+class VISIBILITY_HIDDEN PTHIdentifierTableTrait {
+public:
+  typedef PTHIdKey* key_type;
+  typedef key_type  key_type_ref;
+  
+  typedef uint32_t  data_type;
+  typedef data_type data_type_ref;
+  
+  static unsigned ComputeHash(PTHIdKey* key) {
+    return BernsteinHash(key->II->getName());
+  }
+  
+  static std::pair<unsigned,unsigned> 
+  EmitKeyDataLength(llvm::raw_ostream& Out, const PTHIdKey* key, uint32_t) {    
+    unsigned n = strlen(key->II->getName()) + 1;
+    ::Emit16(Out, n);
+    return std::make_pair(n, sizeof(uint32_t));
+  }
+  
+  static void EmitKey(llvm::raw_ostream& Out, PTHIdKey* key, unsigned n) {
+    // Record the location of the key data.  This is used when generating
+    // the mapping from persistent IDs to strings.
+    key->FileOffset = Out.tell();
+    Out.write(key->II->getName(), n);
+  }
+  
+  static void EmitData(llvm::raw_ostream& Out, PTHIdKey*, uint32_t pID,
+                       unsigned) {
+    ::Emit32(Out, pID);
+  }        
+};
+} // end anonymous namespace
+
+/// EmitIdentifierTable - Emits two tables to the PTH file.  The first is
+///  a hashtable mapping from identifier strings to persistent IDs.  The second
+///  is a straight table mapping from persistent IDs to string data (the
+///  keys of the first table).
+///
+std::pair<Offset,Offset> PTHWriter::EmitIdentifierTable() {
+  // Build two maps:
+  //  (1) an inverse map from persistent IDs -> (IdentifierInfo*,Offset)
+  //  (2) a map from (IdentifierInfo*, Offset)* -> persistent IDs
+
+  // Note that we use 'calloc', so all the bytes are 0.
+  PTHIdKey *IIDMap = (PTHIdKey*)calloc(idcount, sizeof(PTHIdKey));
+
+  // Create the hashtable.
+  OnDiskChainedHashTableGenerator<PTHIdentifierTableTrait> IIOffMap;
+  
+  // Generate mapping from persistent IDs -> IdentifierInfo*.
+  for (IDMap::iterator I = IM.begin(), E = IM.end(); I != E; ++I) {
+    // Decrement by 1 because we are using a vector for the lookup and
+    // 0 is reserved for NULL.
+    assert(I->second > 0);
+    assert(I->second-1 < idcount);
+    unsigned idx = I->second-1;
+    
+    // Store the mapping from persistent ID to IdentifierInfo*
+    IIDMap[idx].II = I->first;
+    
+    // Store the reverse mapping in a hashtable.
+    IIOffMap.insert(&IIDMap[idx], I->second);
+  }
+  
+  // Write out the inverse map first.  This causes the PCIDKey entries to
+  // record PTH file offsets for the string data.  This is used to write
+  // the second table.
+  Offset StringTableOffset = IIOffMap.Emit(Out);
+  
+  // Now emit the table mapping from persistent IDs to PTH file offsets.  
+  Offset IDOff = Out.tell();
+  Emit32(idcount);  // Emit the number of identifiers.
+  for (unsigned i = 0 ; i < idcount; ++i)
+    Emit32(IIDMap[i].FileOffset);
+  
+  // Finally, release the inverse map.
+  free(IIDMap);
+  
+  return std::make_pair(IDOff, StringTableOffset);
+}
diff --git a/lib/Frontend/DependencyFile.cpp b/lib/Frontend/DependencyFile.cpp
new file mode 100644
index 0000000000..4cee280ebf
--- /dev/null
+++ b/lib/Frontend/DependencyFile.cpp
@@ -0,0 +1,171 @@
+//===--- DependencyFile.cpp - Generate dependency file --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This code generates dependency files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Frontend/Utils.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PPCallbacks.h"
+#include "clang/Lex/DirectoryLookup.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/System/Path.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+
+using namespace clang;
+
+namespace {
+class VISIBILITY_HIDDEN DependencyFileCallback : public PPCallbacks {
+  std::vector<std::string> Files;
+  llvm::StringSet<> FilesSet;
+  const Preprocessor *PP;
+  std::vector<std::string> Targets;
+  llvm::raw_ostream *OS;
+  bool IncludeSystemHeaders;
+  bool PhonyTarget;
+private:
+  bool FileMatchesDepCriteria(const char *Filename,
+                              SrcMgr::CharacteristicKind FileType);
+  void OutputDependencyFile();
+
+public:
+  DependencyFileCallback(const Preprocessor *_PP, 
+                         llvm::raw_ostream *_OS, 
+                         const std::vector<std::string> &_Targets,
+                         bool _IncludeSystemHeaders,
+                         bool _PhonyTarget)
+    : PP(_PP), Targets(_Targets), OS(_OS),
+      IncludeSystemHeaders(_IncludeSystemHeaders), PhonyTarget(_PhonyTarget) {}
+
+  ~DependencyFileCallback() {
+    OutputDependencyFile();
+    OS->flush();
+    delete OS;
+  }
+
+  virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
+                           SrcMgr::CharacteristicKind FileType);
+};
+}
+
+
+
+void clang::AttachDependencyFileGen(Preprocessor *PP, llvm::raw_ostream *OS,
+                                    std::vector<std::string> &Targets,
+                                    bool IncludeSystemHeaders,
+                                    bool PhonyTarget) {
+  assert(!Targets.empty() && "Target required for dependency generation");
+
+  DependencyFileCallback *PPDep = 
+    new DependencyFileCallback(PP, OS, Targets, IncludeSystemHeaders, 
+                               PhonyTarget);
+  PP->setPPCallbacks(PPDep);
+}
+
+/// FileMatchesDepCriteria - Determine whether the given Filename should be
+/// considered as a dependency.
+bool DependencyFileCallback::FileMatchesDepCriteria(const char *Filename,
+                                          SrcMgr::CharacteristicKind FileType) {
+  if (strcmp("<built-in>", Filename) == 0)
+    return false;
+
+  if (IncludeSystemHeaders)
+    return true;
+
+  return FileType == SrcMgr::C_User;
+}
+
+void DependencyFileCallback::FileChanged(SourceLocation Loc,
+                                         FileChangeReason Reason,
+                                         SrcMgr::CharacteristicKind FileType) {
+  if (Reason != PPCallbacks::EnterFile)
+    return;
+  
+  // Dependency generation really does want to go all the way to the
+  // file entry for a source location to find out what is depended on.
+  // We do not want #line markers to affect dependency generation!
+  SourceManager &SM = PP->getSourceManager();
+  
+  const FileEntry *FE =
+    SM.getFileEntryForID(SM.getFileID(SM.getInstantiationLoc(Loc)));
+  if (FE == 0) return;
+  
+  const char *Filename = FE->getName();
+  if (!FileMatchesDepCriteria(Filename, FileType))
+    return;
+
+  // Remove leading "./"
+  if (Filename[0] == '.' && Filename[1] == '/')
+    Filename = &Filename[2];
+
+  if (FilesSet.insert(Filename))
+    Files.push_back(Filename);
+}
+
+void DependencyFileCallback::OutputDependencyFile() {
+  // Write out the dependency targets, trying to avoid overly long
+  // lines when possible. We try our best to emit exactly the same
+  // dependency file as GCC (4.2), assuming the included files are the
+  // same.
+  const unsigned MaxColumns = 75;
+  unsigned Columns = 0;
+
+  for (std::vector<std::string>::iterator
+         I = Targets.begin(), E = Targets.end(); I != E; ++I) {
+    unsigned N = I->length();
+    if (Columns == 0) {
+      Columns += N;
+      *OS << *I;
+    } else if (Columns + N + 2 > MaxColumns) {
+      Columns = N + 2;
+      *OS << " \\\n  " << *I;
+    } else {
+      Columns += N + 1;
+      *OS << ' ' << *I;
+    }
+  }
+
+  *OS << ':';
+  Columns += 1;
+  
+  // Now add each dependency in the order it was seen, but avoiding
+  // duplicates.
+  for (std::vector<std::string>::iterator I = Files.begin(),
+         E = Files.end(); I != E; ++I) {
+    // Start a new line if this would exceed the column limit. Make
+    // sure to leave space for a trailing " \" in case we need to
+    // break the line on the next iteration.
+    unsigned N = I->length();
+    if (Columns + (N + 1) + 2 > MaxColumns) {
+      *OS << " \\\n ";
+      Columns = 2;
+    }
+    *OS << ' ' << *I;
+    Columns += N + 1;
+  }
+  *OS << '\n';
+
+  // Create phony targets if requested.
+  if (PhonyTarget) {
+    // Skip the first entry, this is always the input file itself.
+    for (std::vector<std::string>::iterator I = Files.begin() + 1,
+           E = Files.end(); I != E; ++I) {
+      *OS << '\n';
+      *OS << *I << ":\n";
+    }
+  }
+}
+
diff --git a/lib/Frontend/DiagChecker.cpp b/lib/Frontend/DiagChecker.cpp
new file mode 100644
index 0000000000..2eaf2fe62f
--- /dev/null
+++ b/lib/Frontend/DiagChecker.cpp
@@ -0,0 +1,301 @@
+//===--- DiagChecker.cpp - Diagnostic Checking Functions ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Process the input files and check that the diagnostic messages are expected.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Frontend/Utils.h"
+#include "clang/Frontend/TextDiagnosticBuffer.h"
+#include "clang/Sema/ParseAST.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Preprocessor.h"
+using namespace clang;
+
+typedef TextDiagnosticBuffer::DiagList DiagList;
+typedef TextDiagnosticBuffer::const_iterator const_diag_iterator;
+
+static void EmitError(Preprocessor &PP, SourceLocation Pos, const char *String){
+  unsigned ID = PP.getDiagnostics().getCustomDiagID(Diagnostic::Error, String);
+  PP.Diag(Pos, ID);
+}
+
+
+// USING THE DIAGNOSTIC CHECKER:
+//
+// Indicating that a line expects an error or a warning is simple. Put a comment
+// on the line that has the diagnostic, use "expected-{error,warning}" to tag
+// if it's an expected error or warning, and place the expected text between {{
+// and }} markers. The full text doesn't have to be included, only enough to
+// ensure that the correct diagnostic was emitted.
+//
+// Here's an example:
+//
+//   int A = B; // expected-error {{use of undeclared identifier 'B'}}
+//
+// You can place as many diagnostics on one line as you wish. To make the code
+// more readable, you can use slash-newline to separate out the diagnostics.
+//
+// The simple syntax above allows each specification to match exactly one error.
+// You can use the extended syntax to customize this. The extended syntax is
+// "expected-<type> <n> {{diag text}}", where <type> is one of "error",
+// "warning" or "note", and <n> is a positive integer. This allows the
+// diagnostic to appear as many times as specified. Example:
+//
+//   void f(); // expected-note 2 {{previous declaration is here}}
+//
+
+/// FindDiagnostics - Go through the comment and see if it indicates expected
+/// diagnostics. If so, then put them in a diagnostic list.
+/// 
+static void FindDiagnostics(const char *CommentStart, unsigned CommentLen,
+                            DiagList &ExpectedDiags,
+                            Preprocessor &PP, SourceLocation Pos,
+                            const char *ExpectedStr) {
+  const char *CommentEnd = CommentStart+CommentLen;
+  unsigned ExpectedStrLen = strlen(ExpectedStr);
+  
+  // Find all expected-foo diagnostics in the string and add them to
+  // ExpectedDiags.
+  while (CommentStart != CommentEnd) {
+    CommentStart = std::find(CommentStart, CommentEnd, 'e');
+    if (unsigned(CommentEnd-CommentStart) < ExpectedStrLen) return;
+    
+    // If this isn't expected-foo, ignore it.
+    if (memcmp(CommentStart, ExpectedStr, ExpectedStrLen)) {
+      ++CommentStart;
+      continue;
+    }
+    
+    CommentStart += ExpectedStrLen;
+    
+    // Skip whitespace.
+    while (CommentStart != CommentEnd &&
+           isspace(CommentStart[0]))
+      ++CommentStart;
+    
+    // Default, if we find the '{' now, is 1 time.
+    int Times = 1;
+    int Temp = 0;
+    // In extended syntax, there could be a digit now.
+    while (CommentStart != CommentEnd &&
+           CommentStart[0] >= '0' && CommentStart[0] <= '9') {
+      Temp *= 10;
+      Temp += CommentStart[0] - '0';
+      ++CommentStart;
+    }
+    if (Temp > 0)
+      Times = Temp;
+    
+    // Skip whitespace again.
+    while (CommentStart != CommentEnd &&
+           isspace(CommentStart[0]))
+      ++CommentStart;
+    
+    // We should have a {{ now.
+    if (CommentEnd-CommentStart < 2 ||
+        CommentStart[0] != '{' || CommentStart[1] != '{') {
+      if (std::find(CommentStart, CommentEnd, '{') != CommentEnd)
+        EmitError(PP, Pos, "bogus characters before '{{' in expected string");
+      else
+        EmitError(PP, Pos, "cannot find start ('{{') of expected string");
+      return;
+    }
+    CommentStart += 2;
+
+    // Find the }}.
+    const char *ExpectedEnd = CommentStart;
+    while (1) {
+      ExpectedEnd = std::find(ExpectedEnd, CommentEnd, '}');
+      if (CommentEnd-ExpectedEnd < 2) {
+        EmitError(PP, Pos, "cannot find end ('}}') of expected string");
+        return;
+      }
+      
+      if (ExpectedEnd[1] == '}')
+        break;
+
+      ++ExpectedEnd;  // Skip over singular }'s
+    }
+
+    std::string Msg(CommentStart, ExpectedEnd);
+    std::string::size_type FindPos;
+    while ((FindPos = Msg.find("\\n")) != std::string::npos)
+      Msg.replace(FindPos, 2, "\n");
+    // Add is possibly multiple times.
+    for (int i = 0; i < Times; ++i)
+      ExpectedDiags.push_back(std::make_pair(Pos, Msg));
+
+    CommentStart = ExpectedEnd;
+  }
+}
+
+/// FindExpectedDiags - Lex the main source file to find all of the
+//   expected errors and warnings.
+static void FindExpectedDiags(Preprocessor &PP,
+                              DiagList &ExpectedErrors,
+                              DiagList &ExpectedWarnings,
+                              DiagList &ExpectedNotes) {
+  // Create a raw lexer to pull all the comments out of the main file.  We don't
+  // want to look in #include'd headers for expected-error strings.
+  FileID FID = PP.getSourceManager().getMainFileID();
+  
+  // Create a lexer to lex all the tokens of the main file in raw mode.
+  Lexer RawLex(FID, PP.getSourceManager(), PP.getLangOptions());
+  
+  // Return comments as tokens, this is how we find expected diagnostics.
+  RawLex.SetCommentRetentionState(true);
+
+  Token Tok;
+  Tok.setKind(tok::comment);
+  while (Tok.isNot(tok::eof)) {
+    RawLex.Lex(Tok);
+    if (!Tok.is(tok::comment)) continue;
+    
+    std::string Comment = PP.getSpelling(Tok);
+    if (Comment.empty()) continue;
+
+    
+    // Find all expected errors.
+    FindDiagnostics(&Comment[0], Comment.size(), ExpectedErrors, PP,
+                    Tok.getLocation(), "expected-error");
+
+    // Find all expected warnings.
+    FindDiagnostics(&Comment[0], Comment.size(), ExpectedWarnings, PP,
+                    Tok.getLocation(), "expected-warning");
+
+    // Find all expected notes.
+    FindDiagnostics(&Comment[0]