diff options
author | Sean Hunt <scshunt@csclub.uwaterloo.ca> | 2012-04-07 00:37:53 +0000 |
---|---|---|
committer | Sean Hunt <scshunt@csclub.uwaterloo.ca> | 2012-04-07 00:37:53 +0000 |
commit | 3420e7f360dab7712a9ec4f51d233c7e73642ec7 (patch) | |
tree | 36bcbb118ac482eacb56c76f2bcd6985fb3f8a7b | |
parent | d4f020a3af325630973df8d3a084d0b0e3b68ebc (diff) |
Output UTF-8-encoded characters as identifier characters into assembly
by default.
This is a behaviour configurable in the MCAsmInfo. I've decided to turn
it on by default in (possibly optimistic) hopes that most assemblers are
reasonably sane. If this proves a problem, switching to default seems
reasonable.
I'm not sure if this is the opportune place to test, but it seemed good
to make sure it was tested somewhere.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154235 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/MC/MCAsmInfo.h | 7 | ||||
-rw-r--r-- | lib/MC/MCAsmInfo.cpp | 1 | ||||
-rw-r--r-- | lib/Target/Mangler.cpp | 11 | ||||
-rw-r--r-- | test/CodeGen/X86/utf8.ll | 4 |
4 files changed, 19 insertions, 4 deletions
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index 095ca14c1b..0f67c99371 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -143,6 +143,10 @@ namespace llvm { /// symbol names. This defaults to true. bool AllowPeriodsInName; + /// AllowUTF8 - This is true if the assembler accepts UTF-8 input. + // FIXME: Make this a more general encoding setting? + bool AllowUTF8; + //===--- Data Emission Directives -------------------------------------===// /// ZeroDirective - this should be set to the directive used to get some @@ -485,6 +489,9 @@ namespace llvm { bool doesAllowPeriodsInName() const { return AllowPeriodsInName; } + bool doesAllowUTF8() const { + return AllowUTF8; + } const char *getZeroDirective() const { return ZeroDirective; } diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index 582d21fe90..8286c1dfea 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -49,6 +49,7 @@ MCAsmInfo::MCAsmInfo() { AllowQuotesInName = false; AllowNameToStartWithDigit = false; AllowPeriodsInName = true; + AllowUTF8 = true; ZeroDirective = "\t.zero\t"; AsciiDirective = "\t.ascii\t"; AscizDirective = "\t.asciz\t"; diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp index 53ad155f37..786a0c5ed1 100644 --- a/lib/Target/Mangler.cpp +++ b/lib/Target/Mangler.cpp @@ -22,12 +22,13 @@ #include "llvm/ADT/Twine.h" using namespace llvm; -static bool isAcceptableChar(char C, bool AllowPeriod) { +static bool isAcceptableChar(char C, bool AllowPeriod, bool AllowUTF8) { if ((C < 'a' || C > 'z') && (C < 'A' || C > 'Z') && (C < '0' || C > '9') && C != '_' && C != '$' && C != '@' && - !(AllowPeriod && C == '.')) + !(AllowPeriod && C == '.') && + !(AllowUTF8 && (C & 0x80))) return false; return true; } @@ -56,8 +57,9 @@ static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) { // If any of the characters in the string is an unacceptable character, force // quotes. bool AllowPeriod = MAI.doesAllowPeriodsInName(); + bool AllowUTF8 = MAI.doesAllowUTF8(); for (unsigned i = 0, e = Str.size(); i != e; ++i) - if (!isAcceptableChar(Str[i], AllowPeriod)) + if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8)) return true; return false; } @@ -74,8 +76,9 @@ static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str, } bool AllowPeriod = MAI.doesAllowPeriodsInName(); + bool AllowUTF8 = MAI.doesAllowUTF8(); for (unsigned i = 0, e = Str.size(); i != e; ++i) { - if (!isAcceptableChar(Str[i], AllowPeriod)) + if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8)) MangleLetter(OutName, Str[i]); else OutName.push_back(Str[i]); diff --git a/test/CodeGen/X86/utf8.ll b/test/CodeGen/X86/utf8.ll new file mode 100644 index 0000000000..8a75c2ebce --- /dev/null +++ b/test/CodeGen/X86/utf8.ll @@ -0,0 +1,4 @@ +; RUN: llc < %s -march=x86 | FileCheck %s + +; CHECK: "iΔ",4,4 +@"i\CE\94" = common global i32 0, align 4 |