diff options
author | Dmitri Gribenko <gribozavr@gmail.com> | 2012-07-27 20:36:22 +0000 |
---|---|---|
committer | Dmitri Gribenko <gribozavr@gmail.com> | 2012-07-27 20:36:22 +0000 |
commit | e43031344b17cdb345957c4acc9d39d26f86efd3 (patch) | |
tree | 924b7e1a9a005f1ceb6a10cd0042d8aca8dbb415 | |
parent | 4684778993c667246039b4664acbce59dc99440c (diff) |
Add a function to convert a single Unicode code point to a UTF8 sequence.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@160890 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/clang/Basic/ConvertUTF.h | 16 | ||||
-rw-r--r-- | lib/Basic/ConvertUTF.c | 3 | ||||
-rw-r--r-- | lib/Basic/ConvertUTFWrapper.cpp | 16 |
3 files changed, 33 insertions, 2 deletions
diff --git a/include/clang/Basic/ConvertUTF.h b/include/clang/Basic/ConvertUTF.h index 53d451485d..e7cfa8a767 100644 --- a/include/clang/Basic/ConvertUTF.h +++ b/include/clang/Basic/ConvertUTF.h @@ -110,6 +110,8 @@ typedef unsigned char Boolean; /* 0 or 1 */ #define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF #define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF +#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4 + typedef enum { conversionOK, /* conversion successful */ sourceExhausted, /* partial character in source, but hit end */ @@ -139,11 +141,13 @@ ConversionResult ConvertUTF8toUTF32 ( ConversionResult ConvertUTF16toUTF8 ( const UTF16** sourceStart, const UTF16* sourceEnd, UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); +#endif ConversionResult ConvertUTF32toUTF8 ( const UTF32** sourceStart, const UTF32* sourceEnd, UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags); +#ifdef CLANG_NEEDS_THESE_ONE_DAY ConversionResult ConvertUTF16toUTF32 ( const UTF16** sourceStart, const UTF16* sourceEnd, UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags); @@ -177,6 +181,18 @@ namespace clang { bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, char *&ResultPtr); +/** + * Convert an Unicode code point to UTF8 sequence. + * + * \param Source a Unicode code point. + * \param [in,out] ResultPtr pointer to the output buffer, needs to be at least + * \c UNI_MAX_UTF8_BYTES_PER_CODE_POINT bytes. On success \c ResultPtr is + * updated one past end of the converted sequence. + * + * \returns true on success. + */ +bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr); + } #endif diff --git a/lib/Basic/ConvertUTF.c b/lib/Basic/ConvertUTF.c index e1970039e1..4793b251f6 100644 --- a/lib/Basic/ConvertUTF.c +++ b/lib/Basic/ConvertUTF.c @@ -285,6 +285,7 @@ ConversionResult ConvertUTF16toUTF8 ( *targetStart = target; return result; } +#endif /* --------------------------------------------------------------------- */ @@ -339,8 +340,6 @@ ConversionResult ConvertUTF32toUTF8 ( return result; } -#endif - /* --------------------------------------------------------------------- */ /* diff --git a/lib/Basic/ConvertUTFWrapper.cpp b/lib/Basic/ConvertUTFWrapper.cpp index 42b4f58079..a1b3f7fd9d 100644 --- a/lib/Basic/ConvertUTFWrapper.cpp +++ b/lib/Basic/ConvertUTFWrapper.cpp @@ -51,4 +51,20 @@ bool ConvertUTF8toWide(unsigned WideCharWidth, llvm::StringRef Source, return result == conversionOK; } +bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) { + const UTF32 *SourceStart = &Source; + const UTF32 *SourceEnd = SourceStart + 1; + UTF8 *TargetStart = reinterpret_cast<UTF8 *>(ResultPtr); + UTF8 *TargetEnd = TargetStart + 4; + ConversionResult CR = ConvertUTF32toUTF8(&SourceStart, SourceEnd, + &TargetStart, TargetEnd, + strictConversion); + if (CR != conversionOK) + return false; + + ResultPtr = reinterpret_cast<char*>(TargetStart); + return true; } + +} // end namespace clang + |