aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/clang/Frontend/PreprocessorOutputOptions.h2
-rw-r--r--include/clang/Lex/Preprocessor.h13
-rw-r--r--lib/Frontend/CompilerInstance.cpp2
-rw-r--r--lib/Frontend/CompilerInvocation.cpp46
-rw-r--r--lib/Lex/Lexer.cpp38
-rw-r--r--test/Lexer/unicode.c9
6 files changed, 88 insertions, 22 deletions
diff --git a/include/clang/Frontend/PreprocessorOutputOptions.h b/include/clang/Frontend/PreprocessorOutputOptions.h
index 9793aa6fa7..e273dd613d 100644
--- a/include/clang/Frontend/PreprocessorOutputOptions.h
+++ b/include/clang/Frontend/PreprocessorOutputOptions.h
@@ -25,7 +25,7 @@ public:
public:
PreprocessorOutputOptions() {
- ShowCPP = 1;
+ ShowCPP = 0;
ShowComments = 0;
ShowLineMarkers = 1;
ShowMacroComments = 0;
diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h
index 24b6a18881..fc092e14e9 100644
--- a/include/clang/Lex/Preprocessor.h
+++ b/include/clang/Lex/Preprocessor.h
@@ -160,6 +160,9 @@ class Preprocessor : public RefCountedBase<Preprocessor> {
/// \brief True if pragmas are enabled.
bool PragmasEnabled : 1;
+ /// \brief True if the current build action is a preprocessing action.
+ bool PreprocessedOutput : 1;
+
/// \brief True if we are currently preprocessing a #if or #elif directive
bool ParsingIfOrElifDirective;
@@ -474,6 +477,16 @@ public:
return SuppressIncludeNotFoundError;
}
+ /// Sets whether the preprocessor is responsible for producing output or if
+ /// it is producing tokens to be consumed by Parse and Sema.
+ void setPreprocessedOutput(bool IsPreprocessedOutput) {
+ PreprocessedOutput = IsPreprocessedOutput;
+ }
+
+ /// Returns true if the preprocessor is responsible for generating output,
+ /// false if it is producing tokens to be consumed by Parse and Sema.
+ bool isPreprocessedOutput() const { return PreprocessedOutput; }
+
/// isCurrentLexer - Return true if we are lexing directly from the specified
/// lexer.
bool isCurrentLexer(const PreprocessorLexer *L) const {
diff --git a/lib/Frontend/CompilerInstance.cpp b/lib/Frontend/CompilerInstance.cpp
index a7f0770ec7..27f96b0974 100644
--- a/lib/Frontend/CompilerInstance.cpp
+++ b/lib/Frontend/CompilerInstance.cpp
@@ -243,6 +243,8 @@ void CompilerInstance::createPreprocessor() {
InitializePreprocessor(*PP, PPOpts, getHeaderSearchOpts(), getFrontendOpts());
+ PP->setPreprocessedOutput(getPreprocessorOutputOpts().ShowCPP);
+
// Set up the module path, including the hash for the
// module-creation options.
SmallString<256> SpecificModuleCache(
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp
index f49f30d878..b4b0ddb1c4 100644
--- a/lib/Frontend/CompilerInvocation.cpp
+++ b/lib/Frontend/CompilerInvocation.cpp
@@ -1395,9 +1395,48 @@ static void ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args,
}
static void ParsePreprocessorOutputArgs(PreprocessorOutputOptions &Opts,
- ArgList &Args) {
+ ArgList &Args,
+ frontend::ActionKind Action) {
using namespace options;
- Opts.ShowCPP = !Args.hasArg(OPT_dM);
+
+ switch (Action) {
+ case frontend::ASTDeclList:
+ case frontend::ASTDump:
+ case frontend::ASTDumpXML:
+ case frontend::ASTPrint:
+ case frontend::ASTView:
+ case frontend::EmitAssembly:
+ case frontend::EmitBC:
+ case frontend::EmitHTML:
+ case frontend::EmitLLVM:
+ case frontend::EmitLLVMOnly:
+ case frontend::EmitCodeGenOnly:
+ case frontend::EmitObj:
+ case frontend::FixIt:
+ case frontend::GenerateModule:
+ case frontend::GeneratePCH:
+ case frontend::GeneratePTH:
+ case frontend::ParseSyntaxOnly:
+ case frontend::PluginAction:
+ case frontend::PrintDeclContext:
+ case frontend::RewriteObjC:
+ case frontend::RewriteTest:
+ case frontend::RunAnalysis:
+ case frontend::MigrateSource:
+ Opts.ShowCPP = 0;
+ break;
+
+ case frontend::DumpRawTokens:
+ case frontend::DumpTokens:
+ case frontend::InitOnly:
+ case frontend::PrintPreamble:
+ case frontend::PrintPreprocessedInput:
+ case frontend::RewriteMacros:
+ case frontend::RunPreprocessorOnly:
+ Opts.ShowCPP = !Args.hasArg(OPT_dM);
+ break;
+ }
+
Opts.ShowComments = Args.hasArg(OPT_C);
Opts.ShowLineMarkers = !Args.hasArg(OPT_P);
Opts.ShowMacroComments = Args.hasArg(OPT_CC);
@@ -1478,7 +1517,8 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
// parameters from the function and the "FileManager.h" #include.
FileManager FileMgr(Res.getFileSystemOpts());
ParsePreprocessorArgs(Res.getPreprocessorOpts(), *Args, FileMgr, Diags);
- ParsePreprocessorOutputArgs(Res.getPreprocessorOutputOpts(), *Args);
+ ParsePreprocessorOutputArgs(Res.getPreprocessorOutputOpts(), *Args,
+ Res.getFrontendOpts().ProgramAction);
ParseTargetArgs(Res.getTargetOpts(), *Args);
return Success;
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 3e3aaae5f5..08f406b069 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -2811,14 +2811,13 @@ static bool isUnicodeWhitespace(uint32_t C) {
}
void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
- if (isUnicodeWhitespace(C)) {
- if (!isLexingRawMode()) {
- CharSourceRange CharRange =
- CharSourceRange::getCharRange(getSourceLocation(),
- getSourceLocation(CurPtr));
- Diag(BufferPtr, diag::ext_unicode_whitespace)
- << CharRange;
- }
+ if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
+ isUnicodeWhitespace(C)) {
+ CharSourceRange CharRange =
+ CharSourceRange::getCharRange(getSourceLocation(),
+ getSourceLocation(CurPtr));
+ Diag(BufferPtr, diag::ext_unicode_whitespace)
+ << CharRange;
Result.setFlag(Token::LeadingSpace);
if (SkipWhitespace(Result, CurPtr))
@@ -2832,7 +2831,8 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
return LexIdentifier(Result, CurPtr);
}
- if (!isASCII(*BufferPtr) && !isAllowedIDChar(C)) {
+ if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
+ !isASCII(*BufferPtr) && !isAllowedIDChar(C)) {
// Non-ASCII characters tend to creep into source code unintentionally.
// Instead of letting the parser complain about the unknown token,
// just drop the character.
@@ -2842,13 +2842,11 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
// loophole in the mapping of Unicode characters to basic character set
// characters that allows us to map these particular characters to, say,
// whitespace.
- if (!isLexingRawMode()) {
- CharSourceRange CharRange =
- CharSourceRange::getCharRange(getSourceLocation(),
- getSourceLocation(CurPtr));
- Diag(BufferPtr, diag::err_non_ascii)
- << FixItHint::CreateRemoval(CharRange);
- }
+ CharSourceRange CharRange =
+ CharSourceRange::getCharRange(getSourceLocation(),
+ getSourceLocation(CurPtr));
+ Diag(BufferPtr, diag::err_non_ascii)
+ << FixItHint::CreateRemoval(CharRange);
BufferPtr = CurPtr;
return LexTokenInternal(Result);
@@ -3537,11 +3535,15 @@ LexNextToken:
if (Status == conversionOK)
return LexUnicode(Result, CodePoint, CurPtr);
+ if (isLexingRawMode() || PP->isPreprocessedOutput()) {
+ Kind = tok::unknown;
+ break;
+ }
+
// Non-ASCII characters tend to creep into source code unintentionally.
// Instead of letting the parser complain about the unknown token,
// just diagnose the invalid UTF-8, then drop the character.
- if (!isLexingRawMode())
- Diag(CurPtr, diag::err_invalid_utf8);
+ Diag(CurPtr, diag::err_invalid_utf8);
BufferPtr = CurPtr+1;
goto LexNextToken;
diff --git a/test/Lexer/unicode.c b/test/Lexer/unicode.c
index 1d7b53e2c5..26e77f61fd 100644
--- a/test/Lexer/unicode.c
+++ b/test/Lexer/unicode.c
@@ -1,6 +1,15 @@
// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -E -DPP_ONLY=1 %s -o %t
+// RUN: FileCheck --strict-whitespace --input-file=%t %s
// This file contains Unicode characters; please do not "fix" them!
extern int x; // expected-warning {{treating Unicode character as whitespace}}
extern int x; // expected-warning {{treating Unicode character as whitespace}}
+
+// CHECK: extern int {{x}}
+// CHECK: extern int {{x}}
+
+#if PP_ONLY
+CHECK: The preprocessor should not complain about Unicode characters like ©.
+#endif