aboutsummaryrefslogtreecommitdiff
path: root/lib/Lex/Preprocessor.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lex/Preprocessor.cpp')
-rw-r--r--lib/Lex/Preprocessor.cpp60
1 files changed, 56 insertions, 4 deletions
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index c01019cf43..b933a5fd75 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -27,6 +27,7 @@
#include "clang/Lex/Preprocessor.h"
#include "MacroArgs.h"
+#include "clang/Basic/ConvertUTF.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TargetInfo.h"
@@ -43,6 +44,8 @@
#include "clang/Lex/ScratchBuffer.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Capacity.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
@@ -396,7 +399,7 @@ StringRef Preprocessor::getSpelling(const Token &Tok,
SmallVectorImpl<char> &Buffer,
bool *Invalid) const {
// NOTE: this has to be checked *before* testing for an IdentifierInfo.
- if (Tok.isNot(tok::raw_identifier)) {
+ if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
// Try the fast path.
if (const IdentifierInfo *II = Tok.getIdentifierInfo())
return II->getName();
@@ -494,6 +497,48 @@ void Preprocessor::EndSourceFile() {
// Lexer Event Handling.
//===----------------------------------------------------------------------===//
+static void appendCodePoint(unsigned Codepoint,
+ llvm::SmallVectorImpl<char> &Str) {
+ char ResultBuf[4];
+ char *ResultPtr = ResultBuf;
+ bool Res = ConvertCodePointToUTF8(Codepoint, ResultPtr);
+ (void)Res;
+ assert(Res && "Unexpected conversion failure");
+ Str.append(ResultBuf, ResultPtr);
+}
+
+static void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input) {
+ for (StringRef::iterator I = Input.begin(), E = Input.end(); I != E; ++I) {
+ if (*I != '\\') {
+ Buf.push_back(*I);
+ continue;
+ }
+
+ ++I;
+ assert(*I == 'u' || *I == 'U');
+
+ unsigned NumHexDigits;
+ if (*I == 'u')
+ NumHexDigits = 4;
+ else
+ NumHexDigits = 8;
+
+ assert(I + NumHexDigits <= E);
+
+ uint32_t CodePoint = 0;
+ for (++I; NumHexDigits != 0; ++I, --NumHexDigits) {
+ unsigned Value = llvm::hexDigitValue(*I);
+ assert(Value != -1U);
+
+ CodePoint <<= 4;
+ CodePoint += Value;
+ }
+
+ appendCodePoint(CodePoint, Buf);
+ --I;
+ }
+}
+
/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
/// identifier information for the token and install it into the token,
/// updating the token kind accordingly.
@@ -502,15 +547,22 @@ IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
// Look up this token, see if it is a macro, or if it is a language keyword.
IdentifierInfo *II;
- if (!Identifier.needsCleaning()) {
+ if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
// No cleaning needed, just use the characters from the lexed buffer.
II = getIdentifierInfo(StringRef(Identifier.getRawIdentifierData(),
- Identifier.getLength()));
+ Identifier.getLength()));
} else {
// Cleaning needed, alloca a buffer, clean into it, then use the buffer.
SmallString<64> IdentifierBuffer;
StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
- II = getIdentifierInfo(CleanedStr);
+
+ if (Identifier.hasUCN()) {
+ SmallString<64> UCNIdentifierBuffer;
+ expandUCNs(UCNIdentifierBuffer, CleanedStr);
+ II = getIdentifierInfo(UCNIdentifierBuffer);
+ } else {
+ II = getIdentifierInfo(CleanedStr);
+ }
}
// Update the token info (identifier info and appropriate token kind).