aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeth Cantrell <seth.cantrell@gmail.com>2012-01-18 12:27:10 +0000
committerSeth Cantrell <seth.cantrell@gmail.com>2012-01-18 12:27:10 +0000
commit7748cbc97ff9c6c3940549d30965a10b47a45ee8 (patch)
tree6221bd3972fa668a58b31ec1e8470e65a63f3ce7
parent5393e213f1d86a64e8e6b549c0ef76f9286ce279 (diff)
Add and update tests for character literals
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@148392 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--test/CodeGen/char-literal.c49
-rw-r--r--test/CodeGen/string-literal-short-wstring.c11
-rw-r--r--test/Lexer/char-literal-encoding-error.c10
-rw-r--r--test/Lexer/char-literal.cpp24
-rw-r--r--test/Lexer/constants.c2
-rw-r--r--test/Lexer/utf8-char-literal.cpp5
-rw-r--r--test/Lexer/wchar.c4
7 files changed, 56 insertions, 49 deletions
diff --git a/test/CodeGen/char-literal.c b/test/CodeGen/char-literal.c
index 5963ede392..5452392ea5 100644
--- a/test/CodeGen/char-literal.c
+++ b/test/CodeGen/char-literal.c
@@ -9,11 +9,26 @@ int main() {
// CHECK-CPP0X: store i8 97
char a = 'a';
- // Should pick second character.
+ // Should truncate value (equal to last character).
// CHECK-C: store i8 98
// CHECK-CPP0X: store i8 98
char b = 'ab';
+ // Should get concatonated characters
+ // CHECK-C: store i32 24930
+ // CHECK-CPP0X: store i32 24930
+ int b1 = 'ab';
+
+ // Should get concatonated characters
+ // CHECK-C: store i32 808464432
+ // CHECK-CPP0X: store i32 808464432
+ int b2 = '0000';
+
+ // Should get truncated value (last four characters concatonated)
+ // CHECK-C: store i32 1919512167
+ // CHECK-CPP0X: store i32 1919512167
+ int b3 = 'somesillylongstring';
+
// CHECK-C: store i32 97
// CHECK-CPP0X: store i32 97
wchar_t wa = L'a';
@@ -27,26 +42,11 @@ int main() {
// CHECK-CPP0X: store i16 97
char16_t ua = u'a';
- // Should pick second character.
- // CHECK-CPP0X: store i16 98
- char16_t ub = u'ab';
-
// CHECK-CPP0X: store i32 97
char32_t Ua = U'a';
- // Should pick second character.
- // CHECK-CPP0X: store i32 98
- char32_t Ub = U'ab';
#endif
- // Should pick last character and store its lowest byte.
- // This does not match gcc, which takes the last character, converts it to
- // utf8, and then picks the second-lowest byte of that (they probably store
- // the utf8 in uint16_ts internally and take the lower byte of that).
- // CHECK-C: store i8 48
- // CHECK-CPP0X: store i8 48
- char c = '\u1120\u0220\U00102030';
-
// CHECK-C: store i32 61451
// CHECK-CPP0X: store i32 61451
wchar_t wc = L'\uF00B';
@@ -65,13 +65,6 @@ int main() {
wchar_t wd = L'\U0010F00B';
#if __cplusplus >= 201103L
- // Should take lower word of the 4byte UNC sequence. This does not match
- // gcc. I don't understand what gcc does (it looks like it converts to utf16,
- // then takes the second (!) utf16 word, swaps the lower two nibbles, and
- // stores that?).
- // CHECK-CPP0X: store i16 -4085
- char16_t ud = u'\U0010F00B'; // has utf16 encoding dbc8 dcb0
-
// CHECK-CPP0X: store i32 1110027
char32_t Ud = U'\U0010F00B';
#endif
@@ -80,14 +73,4 @@ int main() {
// CHECK-C: store i32 1110027
// CHECK-CPP0X: store i32 1110027
wchar_t we = L'\u1234\U0010F00B';
-
-#if __cplusplus >= 201103L
- // Should pick second character.
- // CHECK-CPP0X: store i16 -4085
- char16_t ue = u'\u1234\U0010F00B';
-
- // Should pick second character.
- // CHECK-CPP0X: store i32 1110027
- char32_t Ue = U'\u1234\U0010F00B';
-#endif
}
diff --git a/test/CodeGen/string-literal-short-wstring.c b/test/CodeGen/string-literal-short-wstring.c
index 309ffd33b2..88e4a1e400 100644
--- a/test/CodeGen/string-literal-short-wstring.c
+++ b/test/CodeGen/string-literal-short-wstring.c
@@ -29,15 +29,4 @@ int main() {
// -4085 == 0xf00b
// CHECK: store i16 -4085
wchar_t wc = L'\uF00B';
-
- // Should take lower word of the 4byte UNC sequence. This does not match
- // gcc. I don't understand what gcc does (it looks like it converts to utf16,
- // then takes the second (!) utf16 word, swaps the lower two nibbles, and
- // stores that?).
- // CHECK: store i16 -4085
- wchar_t wd = L'\U0010F00B'; // has utf16 encoding dbc8 dcb0
-
- // Should pick second character. (gcc: -9205)
- // CHECK: store i16 -4085
- wchar_t we = L'\u1234\U0010F00B';
}
diff --git a/test/Lexer/char-literal-encoding-error.c b/test/Lexer/char-literal-encoding-error.c
new file mode 100644
index 0000000000..08f9a50a0d
--- /dev/null
+++ b/test/Lexer/char-literal-encoding-error.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -x c++ %s
+
+// This file is encoded using ISO-8859-1
+
+int main() {
+ 'é'; // expected-error {{illegal sequence in character literal}}
+ u'é'; // expected-error {{illegal sequence in character literal}}
+ U'é'; // expected-error {{illegal sequence in character literal}}
+ L'é'; // expected-error {{illegal sequence in character literal}}
+}
diff --git a/test/Lexer/char-literal.cpp b/test/Lexer/char-literal.cpp
new file mode 100644
index 0000000000..5dc53608f8
--- /dev/null
+++ b/test/Lexer/char-literal.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -Wfour-char-constants -fsyntax-only -verify %s
+
+int a = 'ab'; // expected-warning {{multi-character character constant}}
+int b = '\xFF\xFF'; // expected-warning {{multi-character character constant}}
+int c = 'APPS'; // expected-warning {{multi-character character constant}}
+
+char d = '⌘'; // expected-error {{character too large for enclosing character literal type}}
+char e = '\u2318'; // expected-error {{character too large for enclosing character literal type}}
+
+auto f = '\xE2\x8C\x98'; // expected-warning {{multi-character character constant}}
+
+char16_t g = u'ab'; // expected-error {{Unicode character literals may not contain multiple characters}}
+char16_t h = u'\U0010FFFD'; // expected-error {{character too large for enclosing character literal type}}
+
+wchar_t i = L'ab'; // expected-warning {{extraneous characters in character constant ignored}}
+wchar_t j = L'\U0010FFFD';
+
+char32_t k = U'\U0010FFFD';
+
+char l = 'Ø'; // expected-error {{character too large for enclosing character literal type}}
+char m = '👿'; // expected-error {{character too large for enclosing character literal type}}
+
+char32_t n = U'ab'; // expected-error {{Unicode character literals may not contain multiple characters}}
+char16_t o = '👽'; // expected-error {{character too large for enclosing character literal type}}
diff --git a/test/Lexer/constants.c b/test/Lexer/constants.c
index 013103b1f5..290388543c 100644
--- a/test/Lexer/constants.c
+++ b/test/Lexer/constants.c
@@ -66,4 +66,4 @@ double t1[] = {
// PR7888
double g = 1e100000000; // expected-warning {{too large}}
-char h = '\u1234'; // expected-warning {{character unicode escape sequence too long for its type}}
+char h = '\u1234'; // expected-error {{character too large for enclosing character literal type}}
diff --git a/test/Lexer/utf8-char-literal.cpp b/test/Lexer/utf8-char-literal.cpp
index c4ea5fc3c3..12b001e4b4 100644
--- a/test/Lexer/utf8-char-literal.cpp
+++ b/test/Lexer/utf8-char-literal.cpp
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple x86_64-apple-darwin -std=c++11 -fsyntax-only -verify %s
-int array0[u'ñ' == u'\xf1'? 1 : -1];
-int array1['ñ' != u'\xf1'? 1 : -1];
+int array0[u'ñ' == u'\xf1'? 1 : -1];
+int array1['\xF1' != u'\xf1'? 1 : -1];
+int array1['ñ' != u'\xf1'? 1 : -1]; // expected-error {{character too large for enclosing character literal type}}
diff --git a/test/Lexer/wchar.c b/test/Lexer/wchar.c
index 648a38ef3f..de00c02f13 100644
--- a/test/Lexer/wchar.c
+++ b/test/Lexer/wchar.c
@@ -1,9 +1,9 @@
// RUN: %clang_cc1 -fsyntax-only -fshort-wchar -verify %s
void f() {
- (void)L"\U00010000"; // expected-warning {{character unicode escape sequence too long for its type}}
+ (void)L"\U00010000"; // unicode escape produces UTF-16 sequence, so no warning
- (void)L'\U00010000'; // expected-warning {{character unicode escape sequence too long for its type}}
+ (void)L'\U00010000'; // expected-error {{character too large for enclosing character literal type}}
(void)L'ab'; // expected-warning {{extraneous characters in character constant ignored}}