Re-apply r160319 "Don't crash when emitting fixits following Unicode chars"

This time, make sure we don't try to print fixits with newline characters, since they don't have a valid column width, and they don't look good anyway. PR13417 (and originally <rdar://problem/11877454>) git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@160561 91177308-0d34-0410-b5e6-96231b3b80d8
author: Jordan Rose <jordan_rose@apple.com> 2012-07-20 18:50:51 +0000
committer: Jordan Rose <jordan_rose@apple.com> 2012-07-20 18:50:51 +0000
commit: bbe0175255d4da42cd99d93ca1e60c8eabcb4b9a (patch)
tree: d8ea7354233710c07d00f39a5c37394fa27522aa
parent: fd8b43596478b779b6777cb3a595e69d7856c378 (diff)
3 files changed, 57 insertions, 30 deletions
diff --git a/docs/UsersManual.html b/docs/UsersManual.html
index 7a0e325c03..629d9b2574 100644
--- a/docs/UsersManual.html
+++ b/docs/UsersManual.html
@@ -230,6 +230,9 @@ print something like:
 
 <p>When this is disabled, Clang will print "test.c:28: warning..." with no
 column number.</p>
+
+<p>The printed column numbers count bytes from the beginning of the line; take
+care if your source contains multibyte characters.</p>
 </dd>
 
 <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
@@ -396,6 +399,9 @@ exprs.c:47:15:{47:8-47:14}{47:17-47:24}: error: invalid operands to binary expre
 </pre>
 
 <p>The {}'s are generated by -fdiagnostics-print-source-range-info.</p>
+
+<p>The printed column numbers count bytes from the beginning of the line; take
+care if your source contains multibyte characters.</p>
 </dd>
 
 <!-- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -->
@@ -416,6 +422,9 @@ respectively). Both the file name and the insertion string escape backslash (as
 &quot;\\&quot;), tabs (as &quot;\t&quot;), newlines (as &quot;\n&quot;), double
 quotes(as &quot;\&quot;&quot;) and non-printable characters (as octal
 &quot;\xxx&quot;).</p>
+
+<p>The printed column numbers count bytes from the beginning of the line; take
+care if your source contains multibyte characters.</p>
 </dd>
 
 <dt id="opt_fno-elide-type">
diff --git a/lib/Frontend/TextDiagnostic.cpp b/lib/Frontend/TextDiagnostic.cpp
index 306306d3ac..1f2e2c05c5 100644
--- a/lib/Frontend/TextDiagnostic.cpp
+++ b/lib/Frontend/TextDiagnostic.cpp
@@ -1124,23 +1124,28 @@ std::string TextDiagnostic::buildFixItInsertionLine(
   std::string FixItInsertionLine;
   if (Hints.empty() || !DiagOpts.ShowFixits)
     return FixItInsertionLine;
-  unsigned PrevHintEnd = 0;
+  unsigned PrevHintEndCol = 0;
 
   for (ArrayRef<FixItHint>::iterator I = Hints.begin(), E = Hints.end();
        I != E; ++I) {
     if (!I->CodeToInsert.empty()) {
       // We have an insertion hint. Determine whether the inserted
-      // code is on the same line as the caret.
+      // code contains no newlines and is on the same line as the caret.
       std::pair<FileID, unsigned> HintLocInfo
         = SM.getDecomposedExpansionLoc(I->RemoveRange.getBegin());
-      if (LineNo == SM.getLineNumber(HintLocInfo.first, HintLocInfo.second)) {
+      if (LineNo == SM.getLineNumber(HintLocInfo.first, HintLocInfo.second) &&
+          StringRef(I->CodeToInsert).find_first_of("\n\r") == StringRef::npos) {
         // Insert the new code into the line just below the code
         // that the user wrote.
-        unsigned HintColNo
+        // Note: When modifying this function, be very careful about what is a
+        // "column" (printed width, platform-dependent) and what is a
+        // "byte offset" (SourceManager "column").
+        unsigned HintByteOffset
           = SM.getColumnNumber(HintLocInfo.first, HintLocInfo.second) - 1;
-        // hint must start inside the source or right at the end
-        assert(HintColNo<static_cast<unsigned>(map.bytes())+1);
-        HintColNo = map.byteToColumn(HintColNo);
+
+        // The hint must start inside the source or right at the end
+        assert(HintByteOffset < static_cast<unsigned>(map.bytes())+1);
+        unsigned HintCol = map.byteToColumn(HintByteOffset);
 
         // If we inserted a long previous hint, push this one forwards, and add
         // an extra space to show that this is not part of the previous
@@ -1149,32 +1154,27 @@ std::string TextDiagnostic::buildFixItInsertionLine(
         //
         // Note that if this hint is located immediately after the previous
         // hint, no space will be added, since the location is more important.
-        if (HintColNo < PrevHintEnd)
-          HintColNo = PrevHintEnd + 1;
-
-        // FIXME: if the fixit includes tabs or other characters that do not
-        //  take up a single column per byte when displayed then
-        //  I->CodeToInsert.size() is not a column number and we're mixing
-        //  units (columns + bytes). We should get printable versions
-        //  of each fixit before using them.
-        unsigned LastColumnModified
-          = HintColNo + I->CodeToInsert.size();
-
-        if (LastColumnModified <= static_cast<unsigned>(map.bytes())) {
-          // If we're right in the middle of a multibyte character skip to
-          // the end of it.
-          while (map.byteToColumn(LastColumnModified) == -1)
-            ++LastColumnModified;
-          LastColumnModified = map.byteToColumn(LastColumnModified);
-        }
-
+        if (HintCol < PrevHintEndCol)
+          HintCol = PrevHintEndCol + 1;
+
+        // FIXME: This function handles multibyte characters in the source, but
+        // not in the fixits. This assertion is intended to catch unintended
+        // use of multibyte characters in fixits. If we decide to do this, we'll
+        // have to track separate byte widths for the source and fixit lines.
+        assert((size_t)llvm::sys::locale::columnWidth(I->CodeToInsert) ==
+               I->CodeToInsert.size());
+
+        // This relies on one byte per column in our fixit hints.
+        // This should NOT use HintByteOffset, because the source might have
+        // Unicode characters in earlier columns.
+        unsigned LastColumnModified = HintCol + I->CodeToInsert.size();
         if (LastColumnModified > FixItInsertionLine.size())
           FixItInsertionLine.resize(LastColumnModified, ' ');
-        assert(HintColNo+I->CodeToInsert.size() <= FixItInsertionLine.size());
+
         std::copy(I->CodeToInsert.begin(), I->CodeToInsert.end(),
-                  FixItInsertionLine.begin() + HintColNo);
+                  FixItInsertionLine.begin() + HintCol);
 
-        PrevHintEnd = LastColumnModified;
+        PrevHintEndCol = LastColumnModified;
       } else {
         FixItInsertionLine.clear();
         break;
diff --git a/test/FixIt/fixit-unicode.c b/test/FixIt/fixit-unicode.c
index d8e4592336..2af5e08faa 100644
--- a/test/FixIt/fixit-unicode.c
+++ b/test/FixIt/fixit-unicode.c
@@ -1,10 +1,11 @@
 // RUN: %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck -strict-whitespace %s
-// PR13312
+// RUN: %clang_cc1 -fsyntax-only -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck -check-prefix=CHECK-MACHINE %s
 
 struct Foo {
   int bar;
 };
 
+// PR13312
 void test1() {
   struct Foo foo;
   (&foo)☃>bar = 42;
@@ -12,4 +13,21 @@ void test1() {
 // Make sure we emit the fixit right in front of the snowman.
 // CHECK: {{^        \^}}
 // CHECK: {{^        ;}}
+
+// CHECK-MACHINE: fix-it:"{{.*}}fixit-unicode.c":{11:9-11:9}:";"
+}
+
+
+int printf(const char *, ...);
+void test2() {
+  printf("∆: %d", 1L);
+// CHECK: warning: format specifies type 'int' but the argument has type 'long'
+// Don't crash emitting a fixit after the delta.
+// CHECK:  printf("
+// CHECK: : %d", 1L);
+// Unfortunately, we can't actually check the location of the printed fixit,
+// because different systems will render the delta differently (either as a
+// character, or as <U+2206>.) The fixit should line up with the %d regardless.
+
+// CHECK-MACHINE: fix-it:"{{.*}}fixit-unicode.c":{23:16-23:18}:"%ld"
 }
author	Jordan Rose <jordan_rose@apple.com>	2012-07-20 18:50:51 +0000
committer	Jordan Rose <jordan_rose@apple.com>	2012-07-20 18:50:51 +0000
commit	bbe0175255d4da42cd99d93ca1e60c8eabcb4b9a (patch)
tree	d8ea7354233710c07d00f39a5c37394fa27522aa
parent	fd8b43596478b779b6777cb3a595e69d7856c378 (diff)