aboutsummaryrefslogtreecommitdiff
path: root/lib/Basic/ConvertUTF.c
diff options
context:
space:
mode:
authorEli Friedman <eli.friedman@gmail.com>2011-11-01 02:10:54 +0000
committerEli Friedman <eli.friedman@gmail.com>2011-11-01 02:10:54 +0000
commit436ecd959954db0e11c8daf64b3d6b6b6d0eba55 (patch)
treeed27403865d675212a96d7e4ecf37a88252aab24 /lib/Basic/ConvertUTF.c
parentf358c8af7bc0ed58fc1f009aabe928732d3d2316 (diff)
Move ConvertUTF8toUTF32 out of #if 0, in preparation for a patch which needs it.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@143415 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Basic/ConvertUTF.c')
-rw-r--r--lib/Basic/ConvertUTF.c123
1 files changed, 62 insertions, 61 deletions
diff --git a/lib/Basic/ConvertUTF.c b/lib/Basic/ConvertUTF.c
index 124e386c55..b3fa916934 100644
--- a/lib/Basic/ConvertUTF.c
+++ b/lib/Basic/ConvertUTF.c
@@ -339,67 +339,6 @@ ConversionResult ConvertUTF32toUTF8 (
return result;
}
-/* --------------------------------------------------------------------- */
-
-ConversionResult ConvertUTF8toUTF32 (
- const UTF8** sourceStart, const UTF8* sourceEnd,
- UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
- ConversionResult result = conversionOK;
- const UTF8* source = *sourceStart;
- UTF32* target = *targetStart;
- while (source < sourceEnd) {
- UTF32 ch = 0;
- unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
- if (source + extraBytesToRead >= sourceEnd) {
- result = sourceExhausted; break;
- }
- /* Do this check whether lenient or strict */
- if (!isLegalUTF8(source, extraBytesToRead+1)) {
- result = sourceIllegal;
- break;
- }
- /*
- * The cases all fall through. See "Note A" below.
- */
- switch (extraBytesToRead) {
- case 5: ch += *source++; ch <<= 6;
- case 4: ch += *source++; ch <<= 6;
- case 3: ch += *source++; ch <<= 6;
- case 2: ch += *source++; ch <<= 6;
- case 1: ch += *source++; ch <<= 6;
- case 0: ch += *source++;
- }
- ch -= offsetsFromUTF8[extraBytesToRead];
-
- if (target >= targetEnd) {
- source -= (extraBytesToRead+1); /* Back up the source pointer! */
- result = targetExhausted; break;
- }
- if (ch <= UNI_MAX_LEGAL_UTF32) {
- /*
- * UTF-16 surrogate values are illegal in UTF-32, and anything
- * over Plane 17 (> 0x10FFFF) is illegal.
- */
- if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
- if (flags == strictConversion) {
- source -= (extraBytesToRead+1); /* return to the illegal value itself */
- result = sourceIllegal;
- break;
- } else {
- *target++ = UNI_REPLACEMENT_CHAR;
- }
- } else {
- *target++ = ch;
- }
- } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
- result = sourceIllegal;
- *target++ = UNI_REPLACEMENT_CHAR;
- }
- }
- *sourceStart = source;
- *targetStart = target;
- return result;
-}
#endif
/* --------------------------------------------------------------------- */
@@ -527,6 +466,68 @@ ConversionResult ConvertUTF8toUTF16 (
return result;
}
+/* --------------------------------------------------------------------- */
+
+ConversionResult ConvertUTF8toUTF32 (
+ const UTF8** sourceStart, const UTF8* sourceEnd,
+ UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
+ ConversionResult result = conversionOK;
+ const UTF8* source = *sourceStart;
+ UTF32* target = *targetStart;
+ while (source < sourceEnd) {
+ UTF32 ch = 0;
+ unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+ if (source + extraBytesToRead >= sourceEnd) {
+ result = sourceExhausted; break;
+ }
+ /* Do this check whether lenient or strict */
+ if (!isLegalUTF8(source, extraBytesToRead+1)) {
+ result = sourceIllegal;
+ break;
+ }
+ /*
+ * The cases all fall through. See "Note A" below.
+ */
+ switch (extraBytesToRead) {
+ case 5: ch += *source++; ch <<= 6;
+ case 4: ch += *source++; ch <<= 6;
+ case 3: ch += *source++; ch <<= 6;
+ case 2: ch += *source++; ch <<= 6;
+ case 1: ch += *source++; ch <<= 6;
+ case 0: ch += *source++;
+ }
+ ch -= offsetsFromUTF8[extraBytesToRead];
+
+ if (target >= targetEnd) {
+ source -= (extraBytesToRead+1); /* Back up the source pointer! */
+ result = targetExhausted; break;
+ }
+ if (ch <= UNI_MAX_LEGAL_UTF32) {
+ /*
+ * UTF-16 surrogate values are illegal in UTF-32, and anything
+ * over Plane 17 (> 0x10FFFF) is illegal.
+ */
+ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+ if (flags == strictConversion) {
+ source -= (extraBytesToRead+1); /* return to the illegal value itself */
+ result = sourceIllegal;
+ break;
+ } else {
+ *target++ = UNI_REPLACEMENT_CHAR;
+ }
+ } else {
+ *target++ = ch;
+ }
+ } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
+ result = sourceIllegal;
+ *target++ = UNI_REPLACEMENT_CHAR;
+ }
+ }
+ *sourceStart = source;
+ *targetStart = target;
+ return result;
+}
+
/* ---------------------------------------------------------------------
Note A.