diff options
author | Chris Lattner <sabre@nondot.org> | 2009-04-18 22:28:58 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2009-04-18 22:28:58 +0000 |
commit | 88e2524b8f3393975872a23bbe6e8b3cf50d6773 (patch) | |
tree | d499c0691084bf6d50de687da6ef9db7c9ca5a83 /lib/Lex/Preprocessor.cpp | |
parent | 033749571f8d4c804eeb357c70b06424aa24503b (diff) |
Change Preprocessor::AdvanceToTokenCharacter to stop at
the first real character of a token. For example, advancing
to byte 3 of foo\
bar
should stop at the b, not the \.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@69484 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Lex/Preprocessor.cpp')
-rw-r--r-- | lib/Lex/Preprocessor.cpp | 33 |
1 files changed, 21 insertions, 12 deletions
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index 70294417d1..07fab81137 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -292,33 +292,42 @@ void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok, /// token, return a new location that specifies a character within the token. SourceLocation Preprocessor::AdvanceToTokenCharacter(SourceLocation TokStart, unsigned CharNo) { - // If they request the first char of the token, we're trivially done. - if (CharNo == 0) return TokStart; - // Figure out how many physical characters away the specified instantiation // character is. This needs to take into consideration newlines and // trigraphs. const char *TokPtr = SourceMgr.getCharacterData(TokStart); + + // If they request the first char of the token, we're trivially done. + if (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)) + return TokStart; + unsigned PhysOffset = 0; // The usual case is that tokens don't contain anything interesting. Skip // over the uninteresting characters. If a token only consists of simple // chars, this method is extremely fast. - while (CharNo && Lexer::isObviouslySimpleCharacter(*TokPtr)) + while (Lexer::isObviouslySimpleCharacter(*TokPtr)) { + if (CharNo == 0) + return TokStart.getFileLocWithOffset(PhysOffset); ++TokPtr, --CharNo, ++PhysOffset; + } // If we have a character that may be a trigraph or escaped newline, use a // lexer to parse it correctly. - if (CharNo != 0) { - // Skip over the remaining characters. - for (; CharNo; --CharNo) { - unsigned Size; - Lexer::getCharAndSizeNoWarn(TokPtr, Size, Features); - TokPtr += Size; - PhysOffset += Size; - } + for (; CharNo; --CharNo) { + unsigned Size; + Lexer::getCharAndSizeNoWarn(TokPtr, Size, Features); + TokPtr += Size; + PhysOffset += Size; } + // Final detail: if we end up on an escaped newline, we want to return the + // location of the actual byte of the token. For example foo\<newline>bar + // advanced by 3 should return the location of b, not of \\. One compounding + // detail of this is that the escape may be made by a trigraph. + if (!Lexer::isObviouslySimpleCharacter(*TokPtr)) + PhysOffset = Lexer::SkipEscapedNewLines(TokPtr)-TokPtr; + return TokStart.getFileLocWithOffset(PhysOffset); } |