aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Lex/Lexer.cpp37
-rw-r--r--lib/Lex/LiteralSupport.cpp57
2 files changed, 88 insertions, 6 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index c9f73764c9..2b24d1cc75 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -1078,6 +1078,12 @@ static void InitCharacterInfo() {
}
+/// isIdentifierHead - Return true if this is the first character of an
+/// identifier, which is [a-zA-Z_].
+static inline bool isIdentifierHead(unsigned char c) {
+ return (CharInfo[c] & (CHAR_LETTER|CHAR_UNDER)) ? true : false;
+}
+
/// isIdentifierBody - Return true if this is the body character of an
/// identifier, which is [a-zA-Z0-9_].
static inline bool isIdentifierBody(unsigned char c) {
@@ -1543,7 +1549,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
unsigned Size;
char C = getCharAndSize(CurPtr, Size);
char PrevCh = 0;
- while (isNumberBody(C)) { // FIXME: UCNs?
+ while (isNumberBody(C)) { // FIXME: UCNs.
CurPtr = ConsumeChar(CurPtr, Size, Result);
PrevCh = C;
C = getCharAndSize(CurPtr, Size);
@@ -1567,6 +1573,23 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
Result.setLiteralData(TokStart);
}
+/// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes
+/// in C++11.
+const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr) {
+ assert(getFeatures().CPlusPlus0x && "ud-suffix only exists in C++11");
+
+ // Maximally munch an identifier. FIXME: UCNs.
+ unsigned Size;
+ char C = getCharAndSize(CurPtr, Size);
+ if (isIdentifierHead(C)) {
+ do {
+ CurPtr = ConsumeChar(CurPtr, Size, Result);
+ C = getCharAndSize(CurPtr, Size);
+ } while (isIdentifierBody(C));
+ }
+ return CurPtr;
+}
+
/// LexStringLiteral - Lex the remainder of a string literal, after having lexed
/// either " or L" or u8" or u" or U".
void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
@@ -1606,6 +1629,10 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
C = getAndAdvanceChar(CurPtr, Result);
}
+ // If we are in C++11, lex the optional ud-suffix.
+ if (getFeatures().CPlusPlus0x)
+ CurPtr = LexUDSuffix(Result, CurPtr);
+
// If a nul character existed in the string, warn about it.
if (NulCharacter && !isLexingRawMode())
Diag(NulCharacter, diag::null_in_string);
@@ -1685,6 +1712,10 @@ void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
}
}
+ // If we are in C++11, lex the optional ud-suffix.
+ if (getFeatures().CPlusPlus0x)
+ CurPtr = LexUDSuffix(Result, CurPtr);
+
// Update the location of token as well as BufferPtr.
const char *TokStart = BufferPtr;
FormTokenWithChars(Result, CurPtr, Kind);
@@ -1768,6 +1799,10 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
C = getAndAdvanceChar(CurPtr, Result);
}
+ // If we are in C++11, lex the optional ud-suffix.
+ if (getFeatures().CPlusPlus0x)
+ CurPtr = LexUDSuffix(Result, CurPtr);
+
// If a nul character existed in the character, warn about it.
if (NulCharacter && !isLexingRawMode())
Diag(NulCharacter, diag::null_in_char);
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index 547bd4e0c8..e3ff77f4f0 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -731,7 +731,11 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
}
-/// character-literal: [C++0x lex.ccon]
+/// user-defined-character-literal: [C++11 lex.ext]
+/// character-literal ud-suffix
+/// ud-suffix:
+/// identifier
+/// character-literal: [C++11 lex.ccon]
/// ' c-char-sequence '
/// u' c-char-sequence '
/// U' c-char-sequence '
@@ -744,7 +748,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
/// backslash \, or new-line character
/// escape-sequence
/// universal-character-name
-/// escape-sequence: [C++0x lex.ccon]
+/// escape-sequence:
/// simple-escape-sequence
/// octal-escape-sequence
/// hexadecimal-escape-sequence
@@ -757,7 +761,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
/// hexadecimal-escape-sequence:
/// \x hexadecimal-digit
/// hexadecimal-escape-sequence hexadecimal-digit
-/// universal-character-name:
+/// universal-character-name: [C++11 lex.charset]
/// \u hex-quad
/// \U hex-quad hex-quad
/// hex-quad:
@@ -780,8 +784,17 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
assert(begin[0] == '\'' && "Invalid token lexed");
++begin;
+ // Remove an optional ud-suffix.
+ if (end[-1] != '\'') {
+ const char *UDSuffixEnd = end;
+ do {
+ --end;
+ } while (end[-1] != '\'');
+ UDSuffixBuf.assign(end, UDSuffixEnd);
+ }
+
// Trim the ending quote.
- assert(end[-1] == '\'' && "Invalid token lexed");
+ assert(end != begin && "Invalid token lexed");
--end;
// FIXME: The "Value" is an uint64_t so we can handle char literals of
@@ -1071,6 +1084,8 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
Pascal = false;
+ SourceLocation UDSuffixTokLoc;
+
for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
const char *ThisTokBuf = &TokenBuf[0];
// Get the spelling of the token, which eliminates trigraphs, etc. We know
@@ -1085,7 +1100,39 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
continue;
}
- const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1; // Skip end quote.
+ const char *ThisTokEnd = ThisTokBuf+ThisTokLen;
+
+ // Remove an optional ud-suffix.
+ if (ThisTokEnd[-1] != '"') {
+ const char *UDSuffixEnd = ThisTokEnd;
+ do {
+ --ThisTokEnd;
+ } while (ThisTokEnd[-1] != '"');
+
+ StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
+
+ if (UDSuffixBuf.empty()) {
+ UDSuffixBuf.assign(UDSuffix);
+ UDSuffixTokLoc = StringToks[i].getLocation();
+ } else if (!UDSuffixBuf.equals(UDSuffix)) {
+ // C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
+ // result of a concatenation involving at least one user-defined-string-
+ // literal, all the participating user-defined-string-literals shall
+ // have the same ud-suffix.
+ if (Diags) {
+ SourceLocation TokLoc = StringToks[i].getLocation();
+ Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
+ << UDSuffixBuf << UDSuffix
+ << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
+ << SourceRange(TokLoc, TokLoc);
+ }
+ hadError = true;
+ }
+ }
+
+ // Strip the end quote.
+ --ThisTokEnd;
+
// TODO: Input character set mapping support.
// Skip marker for wide or unicode strings.