14 files changed, 455 insertions, 89 deletions
diff --git a/include/clang/AST/ExternalASTSource.h b/include/clang/AST/ExternalASTSource.h
index 267b4838a4..ef09770933 100644
--- a/include/clang/AST/ExternalASTSource.h
+++ b/include/clang/AST/ExternalASTSource.h
@@ -7,7 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-//  This file defines the ExternalASTSource interface, 
+//  This file defines the ExternalASTSource interface, which enables
+//  construction of AST nodes from some external source.x
 //
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_CLANG_AST_EXTERNAL_AST_SOURCE_H
@@ -22,6 +23,7 @@ namespace clang {
 class ASTConsumer;
 class Decl;
 class DeclContext;
+class ExternalSemaSource; // layering violation required for downcasting
 class Stmt;
 
 /// \brief The deserialized representation of a set of declarations
@@ -44,7 +46,15 @@ struct VisibleDeclaration {
 /// actual type and declaration nodes, and read parts of declaration
 /// contexts.
 class ExternalASTSource {
+  /// \brief Whether this AST source also provides information for
+  /// semantic analysis.
+  bool SemaSource;
+
+  friend class ExternalSemaSource;
+
 public:
+  ExternalASTSource() : SemaSource(false) { }
+
   virtual ~ExternalASTSource();
 
   /// \brief Resolve a type ID into a type, potentially building a new
diff --git a/include/clang/Basic/IdentifierTable.h b/include/clang/Basic/IdentifierTable.h
index d4f680494e..3156bbc4e9 100644
--- a/include/clang/Basic/IdentifierTable.h
+++ b/include/clang/Basic/IdentifierTable.h
@@ -229,7 +229,7 @@ private:
 };
 
 /// IdentifierInfoLookup - An abstract class used by IdentifierTable that
-///  provides an interface for for performing lookups from strings
+///  provides an interface for performing lookups from strings
 /// (const char *) to IdentiferInfo objects.
 class IdentifierInfoLookup {
 public:
@@ -260,6 +260,11 @@ public:
   IdentifierTable(const LangOptions &LangOpts,
                   IdentifierInfoLookup* externalLookup = 0);
   
+  /// \brief Set the external identifier lookup mechanism.
+  void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
+    ExternalLookup = IILookup;
+  }
+
   llvm::BumpPtrAllocator& getAllocator() {
     return HashTable.getAllocator();
   }
@@ -295,6 +300,34 @@ public:
     return *II;
   }
   
+  /// \brief Creates a new IdentifierInfo from the given string.
+  ///
+  /// This is a lower-level version of get() that requires that this
+  /// identifier not be known previously and that does not consult an
+  /// external source for identifiers. In particular, external
+  /// identifier sources can use this routine to build IdentifierInfo
+  /// nodes and then introduce additional information about those
+  /// identifiers.
+  IdentifierInfo &CreateIdentifierInfo(const char *NameStart, 
+                                       const char *NameEnd) {
+    llvm::StringMapEntry<IdentifierInfo*> &Entry =
+      HashTable.GetOrCreateValue(NameStart, NameEnd);
+    
+    IdentifierInfo *II = Entry.getValue();
+    assert(!II && "IdentifierInfo already exists");
+    
+    // Lookups failed, make a new IdentifierInfo.
+    void *Mem = getAllocator().Allocate<IdentifierInfo>();
+    II = new (Mem) IdentifierInfo();
+    Entry.setValue(II);
+
+    // Make sure getName() knows how to find the IdentifierInfo
+    // contents.
+    II->Entry = &Entry;
+
+    return *II;
+  }
+
   IdentifierInfo &get(const char *Name) {
     return get(Name, Name+strlen(Name));
   }
@@ -304,14 +337,11 @@ public:
     return get(NameBytes, NameBytes+Name.size());
   }
 
-private:
   typedef HashTableTy::const_iterator iterator;
   typedef HashTableTy::const_iterator const_iterator;
   
   iterator begin() const { return HashTable.begin(); }
   iterator end() const   { return HashTable.end(); }
-public:
-  
   unsigned size() const { return HashTable.size(); }
   
   /// PrintStats - Print some statistics to stderr that indicate how well the
diff --git a/include/clang/Basic/OnDiskHashTable.h b/include/clang/Basic/OnDiskHashTable.h
index 631d497f1a..3caeb9ffd8 100644
--- a/include/clang/Basic/OnDiskHashTable.h
+++ b/include/clang/Basic/OnDiskHashTable.h
@@ -242,6 +242,8 @@ class OnDiskChainedHashTable {
   const unsigned NumEntries;
   const unsigned char* const Buckets;
   const unsigned char* const Base;
+  Info InfoObj;
+
 public:
   typedef typename Info::internal_key_type internal_key_type;
   typedef typename Info::external_key_type external_key_type;
@@ -249,9 +251,10 @@ public:
   
   OnDiskChainedHashTable(unsigned numBuckets, unsigned numEntries,
                          const unsigned char* buckets,
-                         const unsigned char* base)
+                         const unsigned char* base,
+                         const Info &InfoObj = Info())
     : NumBuckets(numBuckets), NumEntries(numEntries),
-      Buckets(buckets), Base(base) {        
+      Buckets(buckets), Base(base), InfoObj(InfoObj) {
         assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 &&
                "'buckets' must have a 4-byte alignment");
       }
@@ -267,22 +270,27 @@ public:
     internal_key_type key;
     const unsigned char* const data;
     const unsigned len;
+    Info *InfoObj;
   public:
     iterator() : data(0), len(0) {}
-    iterator(const internal_key_type k, const unsigned char* d, unsigned l)
-      : key(k), data(d), len(l) {}
+    iterator(const internal_key_type k, const unsigned char* d, unsigned l,
+             Info *InfoObj)
+      : key(k), data(d), len(l), InfoObj(InfoObj) {}
     
-    data_type operator*() const { return Info::ReadData(key, data, len); }    
+    data_type operator*() const { return InfoObj->ReadData(key, data, len); }    
     bool operator==(const iterator& X) const { return X.data == data; }    
     bool operator!=(const iterator& X) const { return X.data != data; }
   };    
   
-  iterator find(const external_key_type& eKey) {
+  iterator find(const external_key_type& eKey, Info *InfoPtr = 0) {
+    if (!InfoPtr)
+      InfoPtr = &InfoObj;
+
     using namespace io;
     const internal_key_type& iKey = Info::GetInternalKey(eKey);
     unsigned key_hash = Info::ComputeHash(iKey);
     
-    // Each bucket is just a 32-bit offset into the PTH file.
+    // Each bucket is just a 32-bit offset into the hash table file.
     unsigned idx = key_hash & (NumBuckets - 1);
     const unsigned char* Bucket = Buckets + sizeof(uint32_t)*idx;
     
@@ -319,7 +327,7 @@ public:
       }
       
       // The key matches!
-      return iterator(X, Items + L.first, L.second);
+      return iterator(X, Items + L.first, L.second, InfoPtr);
     }
     
     return iterator();
@@ -329,7 +337,8 @@ public:
   
   
   static OnDiskChainedHashTable* Create(const unsigned char* buckets,
-                                        const unsigned char* const base) {
+                                        const unsigned char* const base,
+                                        const Info &InfoObj = Info()) {
     using namespace io;
     assert(buckets > base);
     assert((reinterpret_cast<uintptr_t>(buckets) & 0x3) == 0 &&
@@ -338,7 +347,7 @@ public:
     unsigned numBuckets = ReadLE32(buckets);
     unsigned numEntries = ReadLE32(buckets);
     return new OnDiskChainedHashTable<Info>(numBuckets, numEntries, buckets,
-                                            base);
+                                            base, InfoObj);
   }  
 };
 
diff --git a/include/clang/Frontend/PCHReader.h b/include/clang/Frontend/PCHReader.h
index 6ee549abf5..3af147f7a9 100644
--- a/include/clang/Frontend/PCHReader.h
+++ b/include/clang/Frontend/PCHReader.h
@@ -15,9 +15,10 @@
 
 #include "clang/Frontend/PCHBitCodes.h"
 #include "clang/AST/DeclarationName.h"
-#include "clang/AST/ExternalASTSource.h"
+#include "clang/Sema/ExternalSemaSource.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/IdentifierTable.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/APSInt.h"
@@ -44,7 +45,9 @@ class Decl;
 class DeclContext;
 class GotoStmt;
 class LabelStmt;
+class NamedDecl;
 class Preprocessor;
+class Sema;
 class SwitchCase;
 
 /// \brief Reads a precompiled head containing the contents of a
@@ -59,11 +62,15 @@ class SwitchCase;
 /// The PCH reader provides lazy de-serialization of declarations, as
 /// required when traversing the AST. Only those AST nodes that are
 /// actually required will be de-serialized.
-class PCHReader : public ExternalASTSource {
+class PCHReader : public ExternalSemaSource, public IdentifierInfoLookup {
 public:
   enum PCHReadResult { Success, Failure, IgnorePCH };
 
 private:
+  /// \brief The semantic analysis object that will be processing the
+  /// PCH file and the translation unit that uses it.
+  Sema *SemaObj;
+
   /// \brief The preprocessor that will be loading the source file.
   Preprocessor &PP;
 
@@ -116,8 +123,14 @@ private:
   /// DeclContext.
   DeclContextOffsetsMap DeclContextOffsets;
 
-  /// \brief String data for the identifiers in the PCH file.
-  const char *IdentifierTable;
+  /// \brief Actual data for the on-disk hash table.
+  ///
+  /// FIXME: This will eventually go away.
+  const char *IdentifierTableData;
+
+  /// \brief A pointer to an on-disk hash table of opaque type
+  /// IdentifierHashTable.
+  void *IdentifierLookupTable;
 
   /// \brief String data for identifiers, indexed by the identifier ID
   /// minus one.
@@ -158,10 +171,10 @@ private:
   /// in the PCH file.
   unsigned TotalNumStatements;
 
-  /// \brief 
+  /// \brief FIXME: document!
   llvm::SmallVector<uint64_t, 4> SpecialTypes;
 
-  PCHReadResult ReadPCHBlock();
+  PCHReadResult ReadPCHBlock(uint64_t &PreprocessorBlockOffset);
   bool CheckPredefinesBuffer(const char *PCHPredef, 
                              unsigned PCHPredefLen,
                              FileID PCHBufferID);
@@ -179,8 +192,9 @@ private:
 public:
   typedef llvm::SmallVector<uint64_t, 64> RecordData;
 
-  PCHReader(Preprocessor &PP, ASTContext &Context) 
-    : PP(PP), Context(Context), IdentifierTable(0), NumStatementsRead(0) { }
+  explicit PCHReader(Preprocessor &PP, ASTContext &Context) 
+    : SemaObj(0), PP(PP), Context(Context), 
+      IdentifierTableData(0), NumStatementsRead(0) { }
 
   ~PCHReader() {}
 
@@ -246,6 +260,23 @@ public:
   /// \brief Print some statistics about PCH usage.
   virtual void PrintStats();
 
+  /// \brief Initialize the semantic source with the Sema instance
+  /// being used to perform semantic analysis on the abstract syntax
+  /// tree.
+  virtual void InitializeSema(Sema &S);
+
+  /// \brief Retrieve the IdentifierInfo for the named identifier.
+  ///
+  /// This routine builds a new IdentifierInfo for the given
+  /// identifier. If any declarations with this name are visible from
+  /// translation unit scope, their declarations will be deserialized
+  /// and introduced into the declaration chain of the
+  /// identifier. FIXME: if this identifier names a macro, deserialize
+  /// the macro.
+  virtual IdentifierInfo* get(const char *NameStart, const char *NameEnd);
+
+  void SetIdentifierInfo(unsigned ID, const IdentifierInfo *II);
+
   /// \brief Report a diagnostic.
   DiagnosticBuilder Diag(unsigned DiagID);
 
@@ -284,9 +315,22 @@ public:
   /// supplements.
   ASTContext &getContext() { return Context; }
 
+  // FIXME: temporary hack to store declarations that we deserialized
+  // before we had access to the Sema object.
+  llvm::SmallVector<NamedDecl *, 16> TUDecls;
+
+  /// \brief Retrieve the semantic analysis object used to analyze the
+  /// translation unit in which the precompiled header is being
+  /// imported.
+  Sema *getSema() { return SemaObj; }
+
   /// \brief Retrieve the stream that this PCH reader is reading from.
   llvm::BitstreamReader &getStream() { return Stream; }
 
+  /// \brief Retrieve the identifier table associated with the
+  /// preprocessor.
+  IdentifierTable &getIdentifierTable();
+
   /// \brief Record that the given ID maps to the given switch-case
   /// statement.
   void RecordSwitchCaseID(SwitchCase *SC, unsigned ID);
diff --git a/include/clang/Sema/ExternalSemaSource.h b/include/clang/Sema/ExternalSemaSource.h
new file mode 100644
index 0000000000..1c216e4b86
--- /dev/null
+++ b/include/clang/Sema/ExternalSemaSource.h
@@ -0,0 +1,45 @@
+//===--- ExternalSemaSource.h - External Sema Interface ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the ExternalSemaSource interface.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_SEMA_EXTERNAL_SEMA_SOURCE_H
+#define LLVM_CLANG_SEMA_EXTERNAL_SEMA_SOURCE_H
+
+#include "clang/AST/ExternalASTSource.h"
+
+namespace clang {
+
+class Sema;
+
+/// \brief An abstract interface that should be implemented by
+/// external AST sources that also provide information for semantic
+/// analysis.
+class ExternalSemaSource : public ExternalASTSource {
+public:
+  ExternalSemaSource() {
+    ExternalASTSource::SemaSource = true;
+  }
+
+  /// \brief Initialize the semantic source with the Sema instance
+  /// being used to perform semantic analysis on the abstract syntax
+  /// tree.
+  virtual void InitializeSema(Sema &S) {}
+  
+  // isa/cast/dyn_cast support
+  static bool classof(const ExternalASTSource *Source) { 
+    return Source->SemaSource;
+  }
+  static bool classof(const ExternalSemaSource *) { return true; }
+};
+
+} // end namespace clang
+
+#endif
diff --git a/include/clang/Sema/SemaConsumer.h b/include/clang/Sema/SemaConsumer.h
index 25d4253390..e821947035 100644
--- a/include/clang/Sema/SemaConsumer.h
+++ b/include/clang/Sema/SemaConsumer.h
@@ -25,7 +25,7 @@ namespace clang {
   /// analysis of the entities in those ASTs.
   class SemaConsumer : public ASTConsumer {
   public:
-    explicit SemaConsumer() {
+    SemaConsumer() {
       ASTConsumer::SemaConsumer = true;
     }
 
diff --git a/lib/Frontend/PCHReader.cpp b/lib/Frontend/PCHReader.cpp
index abdb24f0ca..00075c88b6 100644
--- a/lib/Frontend/PCHReader.cpp
+++ b/lib/Frontend/PCHReader.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 #include "clang/Frontend/PCHReader.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
+#include "../Sema/Sema.h" // FIXME: move Sema headers elsewhere
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
@@ -22,6 +23,7 @@
 #include "clang/AST/Type.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/Preprocessor.h"
+#include "clang/Basic/OnDiskHashTable.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/SourceManagerInternals.h"
 #include "clang/Basic/FileManager.h"
@@ -994,6 +996,111 @@ unsigned PCHStmtReader::VisitBlockDeclRefExpr(BlockDeclRefExpr *E) {
   return 0;
 }
 
+//===----------------------------------------------------------------------===//
+// PCH reader implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+class VISIBILITY_HIDDEN PCHIdentifierLookupTrait {
+  PCHReader &Reader;
+
+  // If we know the IdentifierInfo in advance, it is here and we will
+  // not build a new one. Used when deserializing information about an
+  // identifier that was constructed before the PCH file was read.
+  IdentifierInfo *KnownII;
+
+public:
+  typedef IdentifierInfo * data_type;
+
+  typedef const std::pair<const char*, unsigned> external_key_type;
+
+  typedef external_key_type internal_key_type;
+
+  explicit PCHIdentifierLookupTrait(PCHReader &Reader, IdentifierInfo *II = 0) 
+    : Reader(Reader), KnownII(II) { }
+  
+  static bool EqualKey(const internal_key_type& a,
+                       const internal_key_type& b) {
+    return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
+                                  : false;
+  }
+  
+  static unsigned ComputeHash(const internal_key_type& a) {
+    return BernsteinHash(a.first, a.second);
+  }
+  
+  // This hopefully will just get inlined and removed by the optimizer.
+  static const internal_key_type&
+  GetInternalKey(const external_key_type& x) { return x; }
+  
+  static std::pair<unsigned, unsigned>
+  ReadKeyDataLength(const unsigned char*& d) {
+    using namespace clang::io;
+    unsigned KeyLen = ReadUnalignedLE16(d);
+    unsigned DataLen = ReadUnalignedLE16(d);
+    return std::make_pair(KeyLen, DataLen);
+  }
+    
+  static std::pair<const char*, unsigned>
+  ReadKey(const unsigned char* d, unsigned n) {
+    assert(n >= 2 && d[n-1] == '\0');
+    return std::make_pair((const char*) d, n-1);
+  }
+    
+  IdentifierInfo *ReadData(const internal_key_type& k, 
+                           const unsigned char* d,
+                           unsigned DataLen) {
+    using namespace clang::io;
+    uint32_t Bits = ReadUnalignedLE32(d); // FIXME: use these?
+    (void)Bits;
+    pch::IdentID ID = ReadUnalignedLE32(d);
+    DataLen -= 8;
+
+    // Build the IdentifierInfo itself and link the identifier ID with
+    // the new IdentifierInfo.
+    IdentifierInfo *II = KnownII;
+    if (!II)
+      II = &Reader.getIdentifierTable().CreateIdentifierInfo(
+                                                 k.first, k.first + k.second);
+    Reader.SetIdentifierInfo(ID, II);
+
+    // FIXME: If this identifier is a macro, deserialize the macro
+    // definition now.
+
+    // Read all of the declarations visible at global scope with this
+    // name.
+    Sema *SemaObj = Reader.getSema();
+    while (DataLen > 0) {
+      NamedDecl *D = cast<NamedDecl>(Reader.GetDecl(ReadUnalignedLE32(d)));
+
+      if (SemaObj) {
+        // Introduce this declaration into the translation-unit scope
+        // and add it to the declaration chain for this identifier, so
+        // that (unqualified) name lookup will find it.
+        SemaObj->TUScope->AddDecl(Action::DeclPtrTy::make(D));
+        SemaObj->IdResolver.AddDeclToIdentifierChain(II, D);
+      } else {
+        // Queue this declaration so that it will be added to the
+        // translation unit scope and identifier's declaration chain
+        // once a Sema object is known.
+        // FIXME: This is a temporary hack. It will go away once we have
+        // lazy deserialization of macros.
+        Reader.TUDecls.push_back(D);
+      }
+
+      DataLen -= 4;
+    }
+    return II;
+  }
+};
+  
+} // end anonymous namespace  
+
+/// \brief The on-disk hash table used to contain information about
+/// all of the identifiers in the program.
+typedef OnDiskChainedHashTable<PCHIdentifierLookupTrait> 
+  PCHIdentifierLookupTable;
+
 // FIXME: use the diagnostics machinery
 static bool Error(const char *Str) {
   std::fprintf(stderr, "%s\n", Str);
@@ -1314,30 +1421,18 @@ bool PCHReader::ReadPreprocessorBlock() {
   }
 }
 
-PCHReader::PCHReadResult PCHReader::ReadPCHBlock() {
+PCHReader::PCHReadResult 
+PCHReader::ReadPCHBlock(uint64_t &PreprocessorBlockOffset) {
   if (Stream.EnterSubBlock(pch::PCH_BLOCK_ID)) {
     Error("Malformed block record");
     return Failure;
   }
 
-  uint64_t PreprocessorBlockBit = 0;
-
   // Read all of the records and blocks for the PCH file.
   RecordData Record;
   while (!Stream.AtEndOfStream()) {
     unsigned Code = Stream.ReadCode();
     if (Code == llvm::bitc::END_BLOCK) {
-      // If we saw the preprocessor block, read it now.
-      if (PreprocessorBlockBit) {
-        uint64_t SavedPos = Stream.GetCurrentBitNo();
-        Stream.JumpToBit(PreprocessorBlockBit);
-        if (ReadPreprocessorBlock()) {
-          Error("Malformed preprocessor block");
-          return Failure;
-        }
-        Stream.JumpToBit(SavedPos);
-      }        
-      
       if (Stream.ReadBlockEnd()) {
         Error("Error at end of module block");
         return Failure;
@@ -1360,11 +1455,11 @@ PCHReader::PCHReadResult PCHReader::ReadPCHBlock() {
       case pch::PREPROCESSOR_BLOCK_ID:
         // Skip the preprocessor block for now, but remember where it is.  We
         // want to read it in after the identifier table.
-        if (PreprocessorBlockBit) {
+        if (PreprocessorBlockOffset) {
           Error("Multiple preprocessor blocks found.");
           return Failure;
         }
-        PreprocessorBlockBit = Stream.GetCurrentBitNo();
+        PreprocessorBlockOffset = Stream.GetCurrentBitNo();
         if (Stream.SkipBlock()) {
           Error("Malformed block record");
           return Failure;
@@ -1437,7 +1532,15 @@ PCHReader::PCHReadResult PCHReader::ReadPCHBlock() {
     }
 
     case pch::IDENTIFIER_TABLE:
-      IdentifierTable = BlobStart;
+      IdentifierTableData = BlobStart;
+      IdentifierLookupTable 
+        = PCHIdentifierLookupTable::Create(
+                        (const unsigned char *)IdentifierTableData + Record[0],
+                        (const unsigned char *)IdentifierTableData, 
+                        PCHIdentifierLookupTrait(*this));
+      // FIXME: What about any identifiers already placed into the
+      // identifier table? Should we load decls with those names now?
+      PP.getIdentifierTable().setExternalIdentifierLookup(this);
       break;
 
     case pch::IDENTIFIER_OFFSET:
@@ -1479,6 +1582,23 @@ PCHReader::PCHReadResult PCHReader::ReadPCHBlock() {
   return Failure;
 }
 
+namespace {
+  /// \brief Helper class that saves the current stream position and
+  /// then restores it when destroyed.
+  struct VISIBILITY_HIDDEN SavedStreamPosition {
+    explicit SavedStreamPosition(llvm::BitstreamReader &Stream)
+      : Stream(Stream), Offset(Stream.GetCurrentBitNo()) { }
+
+    ~SavedStreamPosition() {
+      Stream.JumpToBit(Offset);
+    }
+
+  private:
+    llvm::BitstreamReader &Stream;
+    uint64_t Offset;
+  };
+}
+
 PCHReader::PCHReadResult PCHReader::ReadPCH(const std::string &FileName) {
   // Set the PCH file name.
   this->FileName = FileName;
@@ -1506,6 +1626,7 @@ PCHReader::PCHReadResult PCHReader::ReadPCH(const std::string &FileName) {
 
   // We expect a number of well-defined blocks, though we don't necessarily
   // need to understand them all.
+  uint64_t PreprocessorBlockOffset = 0;
   while (!Stream.AtEndOfStream()) {
     unsigned Code = Stream.ReadCode();
     
@@ -1515,7 +1636,7 @@ PCHReader::PCHReadResult PCHReader::ReadPCH(const std::string &FileName) {
     }
 
     unsigned BlockID = Stream.ReadSubBlockID();
-    
+
     // We only know the PCH subblock ID.
     switch (BlockID) {
     case llvm::bitc::BLOCKINFO_BLOCK_ID:
@@ -1525,7 +1646,7 @@ PCHReader::PCHReadResult PCHReader::ReadPCH(const std::string &FileName) {
       }
       break;
     case pch::PCH_BLOCK_ID:
-      switch (ReadPCHBlock()) {
+      switch (ReadPCHBlock(PreprocessorBlockOffset)) {
       case Success:
         break;
 
@@ -1551,28 +1672,54 @@ PCHReader::PCHReadResult PCHReader::ReadPCH(const std::string &FileName) {
   // Load the translation unit declaration
   ReadDeclRecord(DeclOffsets[0], 0);
 
+  // Initialization of builtins and library builtins occurs before the
+  // PCH file is read, so there may be some identifiers that were
+  // loaded into the IdentifierTable before we intercepted the
+  // creation of identifiers. Iterate through the list of known
+  // identifiers and determine whether we have to establish
+  // preprocessor definitions or top-level identifier declaration
+  // chains for those identifiers.
+  //
+  // We copy the IdentifierInfo pointers to a small vector first,
+  // since de-serializing declarations or macro definitions can add
+  // new entries into the identifier table, invalidating the
+  // iterators.
+  llvm::SmallVector<IdentifierInfo *, 128> Identifiers;
+  for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(),
+                              IdEnd = PP.getIdentifierTable().end();
+       Id != IdEnd; ++Id)
+    Identifiers.push_back(Id->second);
+  PCHIdentifierLookupTable *IdTable 
+    = (PCHIdentifierLookupTable *)IdentifierLookupTable;
+  for (unsigned I = 0, N = Identifiers.size(); I != N; ++I) {
+    IdentifierInfo *II = Identifiers[I];
+    // Look in the on-disk hash table for an entry for
+    PCHIdentifierLookupTrait Info(*this, II);
+    std::pair<const char*, unsigned> Key(II->getName(), II->getLength());
+    PCHIdentifierLookupTable::iterator Pos = IdTable->find(Key, &Info);
+    if (Pos == IdTable->end())
+      continue;
+
+    // Dereferencing the iterator has the effect of populating the
+    // IdentifierInfo node with the various declarations it needs.
+    (void)*Pos;
+  }
+
   // Load the special types.
   Context.setBuiltinVaListType(
     GetType(SpecialTypes[pch::SPECIAL_TYPE_BUILTIN_VA_LIST]));
 
-  return Success;
-}
-
-namespace {
-  /// \brief Helper class that saves the current stream position and
-  /// then restores it when destroyed.
-  struct VISIBILITY_HIDDEN SavedStreamPosition {
-    explicit SavedStreamPosition(llvm::BitstreamReader &Stream)
-      : Stream(Stream), Offset(Stream.GetCurrentBitNo()) { }
-
-    ~SavedStreamPosition() {
-      Stream.JumpToBit(Offset);
+  // If we saw the preprocessor block, read it now.
+  if (PreprocessorBlockOffset) {
+    SavedStreamPosition SavedPos(Stream);
+    Stream.JumpToBit(PreprocessorBlockOffset);
+    if (ReadPreprocessorBlock()) {
+      Error("Malformed preprocessor block");
+      return Failure;
     }
+  }
 
-  private:
-    llvm::BitstreamReader &Stream;
-    uint64_t Offset;
-  };
+  return Success;
 }
 
 /// \brief Parse the record that corresponds to a LangOptions data
@@ -2021,7 +2168,7 @@ Decl *PCHReader::ReadDeclRecord(uint64_t Offset, unsigned Index) {
   }
   }
 
-  assert(D && "Unknown declaration creating PCH file");
+  assert(D && "Unknown declaration reading PCH file");
   if (D) {
     LoadedDecl(Index, D);
     Reader.Visit(D);
@@ -2220,11 +2367,44 @@ void PCHReader::PrintStats() {
   std::fprintf(stderr, "\n");
 }
 
+void PCHReader::InitializeSema(Sema &S) {
+  SemaObj = &S;
+ 
+  // FIXME: this makes sure any declarations that were deserialized
+  // "too early" still get added to the identifier's declaration
+  // chains.
+  for (unsigned I = 0, N = TUDecls.size(); I != N; ++I) {
+    SemaObj->TUScope->AddDecl(Action::DeclPtrTy::make(TUDecls[I]));
+    SemaObj->IdResolver.AddDecl(TUDecls[I]);
+  }
+  TUDecls.clear();
+}
+
+IdentifierInfo* PCHReader::get(const char *NameStart, const char *NameEnd) {
+  // Try to find this name within our on-disk hash table
+  PCHIdentifierLookupTable *IdTable 
+    = (PCHIdentifierLookupTable *)IdentifierLookupTable;
+  std::pair<const char*, unsigned> Key(NameStart, NameEnd - NameStart);
+  PCHIdentifierLookupTable::iterator Pos = IdTable->find(Key);
+  if (Pos == IdTable->end())
+    return 0;
+
+  // Dereferencing the iterator has the effect of building the
+  // IdentifierInfo node and populating it with the various
+  // declarations it needs.
+  return *Pos;
+}
+
+void PCHReader::SetIdentifierInfo(unsigned ID, const IdentifierInfo *II) {
+  assert(ID && "Non-zero identifier ID required");
+  IdentifierData[ID - 1] = reinterpret_cast<uint64_t>(II);
+}
+
 IdentifierInfo *PCHReader::DecodeIdentifierInfo(unsigned ID) {
   if (ID == 0)
     return 0;
   
-  if (!IdentifierTable || IdentifierData.empty()) {
+  if (!IdentifierTableData || IdentifierData.empty()) {
     Error("No identifier table in PCH file");
     return 0;
   }
@@ -2232,8 +2412,7 @@ IdentifierInfo *PCHReader::DecodeIdentifierInfo(unsigned ID) {
   if (IdentifierData[ID - 1] & 0x01) {
     uint64_t Offset = IdentifierData[ID - 1] >> 1;
     IdentifierData[ID - 1] = reinterpret_cast<uint64_t>(
-                               &Context.Idents.get(IdentifierTable + Offset));
-    // FIXME: also read the contents of the IdentifierInfo.
+                               &Context.Idents.get(IdentifierTableData + Offset));
   }
   
   return reinterpret_cast<IdentifierInfo *>(IdentifierData[ID - 1]);
@@ -2724,6 +2903,12 @@ DiagnosticBuilder PCHReader::Diag(SourceLocation Loc, unsigned DiagID) {
                                     DiagID);
 }
 
+/// \brief Retrieve the identifier table associated with the
+/// preprocessor.
+IdentifierTable &PCHReader::getIdentifierTable() {
+  return PP.getIdentifierTable();
+}
+
 /// \brief Record that the given ID maps to the given switch-case
 /// statement.
 void PCHReader::RecordSwitchCaseID(SwitchCase *SC, unsigned ID) {
diff --git a/lib/Frontend/PCHWriter.cpp b/lib/Frontend/PCHWriter.cpp
index 16eaf982fe..993ba9266f 100644
--- a/lib/Frontend/PCHWriter.cpp
+++ b/lib/Frontend/PCHWriter.cpp
@@ -1421,7 +1421,7 @@ void PCHWriter::WriteSourceManagerBlock(SourceManager &SourceMgr) {
 ///
 void PCHWriter::WritePreprocessor(const Preprocessor &PP) {
   // Enter the preprocessor block.
-  Stream.EnterSubblock(pch::PREPROCESSOR_BLOCK_ID, 3);
+  Stream.EnterSubblock(pch::PREPROCESSOR_BLOCK_ID, 2);
   
   // If the PCH file contains __DATE__ or __TIME__ emit a warning about this.
   // FIXME: use diagnostics subsystem for localization etc.
@@ -1732,13 +1732,13 @@ public:
                       pch::IdentID ID) {
     unsigned KeyLen = strlen(II->getName()) + 1;
     clang::io::Emit16(Out, KeyLen);
-    unsigned DataLen = 4 + 4 + 2; // 4 bytes for token ID, builtin, flags
-                                  // 4 bytes for the persistent ID
-                                  // 2 bytes for the length of the decl chain
+    unsigned DataLen = 4 + 4; // 4 bytes for token ID, builtin, flags
+                              // 4 bytes for the persistent ID
     for (IdentifierResolver::iterator D = IdentifierResolver::begin(II),
                                    DEnd = IdentifierResolver::end();
          D != DEnd; ++D)
       DataLen += sizeof(pch::DeclID);
+    clang::io::Emit16(Out, DataLen);
     return std::make_pair(KeyLen, DataLen);
   }
   
@@ -1762,15 +1762,18 @@ public:
     clang::io::Emit32(Out, Bits);
     clang::io::Emit32(Out, ID);
 
-    llvm::SmallVector<pch::DeclID, 8> Decls;
-    for (IdentifierResolver::iterator D = IdentifierResolver::begin(II),
-                                   DEnd = IdentifierResolver::end();
+    // Emit the declaration IDs in reverse order, because the
+    // IdentifierResolver provides the declarations as they would be
+    // visible (e.g., the function "stat" would come before the struct
+    // "stat"), but IdentifierResolver::AddDeclToIdentifierChain()
+    // adds declarations to the end of the list (so we need to see the
+    // struct "status" before the function "status").
+    llvm::SmallVector<Decl *, 16> Decls(IdentifierResolver::begin(II), 
+                                        IdentifierResolver::end());
+    for (llvm::SmallVector<Decl *, 16>::reverse_iterator D = Decls.rbegin(),
+                                                      DEnd = Decls.rend();
          D != DEnd; ++D)
-      Decls.push_back(Writer.getDeclID(*D));
-
-    clang::io::Emit16(Out, Decls.size());
-    for (unsigned I = 0; I < Decls.size(); ++I)
-      clang::io::Emit32(Out, Decls[I]);
+      clang::io::Emit32(Out, Writer.getDeclID(*D));
   }
 };
 } // end anonymous namespace
@@ -1799,21 +1802,24 @@ void PCHWriter::WriteIdentifierTable() {
 
     // Create the on-disk hash table in a buffer.
     llvm::SmallVector<char, 4096> IdentifierTable; 
+    uint32_t BucketOffset;
     {
       PCHIdentifierTableTrait Trait(*this);
       llvm::raw_svector_ostream Out(IdentifierTable);
-      Generator.Emit(Out, Trait);
+      BucketOffset = Generator.Emit(Out, Trait);
     }
 
     // Create a blob abbreviation
     BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
     Abbrev->Add(BitCodeAbbrevOp(pch::IDENTIFIER_TABLE));
+    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
     unsigned IDTableAbbrev = Stream.EmitAbbrev(Abbrev);
 
     // Write the identifier table
     RecordData Record;
     Record.push_back(pch::IDENTIFIER_TABLE);
+    Record.push_back(BucketOffset);
     Stream.EmitRecordWithBlob(IDTableAbbrev, Record, 
                               &IdentifierTable.front(), 
                               IdentifierTable.size());
diff --git a/lib/Sema/IdentifierResolver.cpp b/lib/Sema/IdentifierResolver.cpp
index c31435b677..ceab859c90 100644
--- a/lib/Sema/IdentifierResolver.cpp
+++ b/lib/Sema/IdentifierResolver.cpp
@@ -243,6 +243,28 @@ IdentifierResolver::begin(DeclarationName Name) {
   return end();
 }
 
+void IdentifierResolver::AddDeclToIdentifierChain(IdentifierInfo *II, 
+                                                  NamedDecl *D) {
+  void *Ptr = II->getFETokenInfo<void>();
+
+  if (!Ptr) {
+    II->setFETokenInfo(D);
+