diff --git a/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C b/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C index 7bca12489555e1ad6ffd1e3704bf847a68b13d8b..d9e982f5c011006b32835dd7be7155d0ced22cbc 100644 --- a/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C +++ b/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C @@ -1,6 +1,6 @@ /* { dg-do compile { target c++17 } } */ const static char c0 = u8''; // { dg-error "empty character" } -const static char c1 = u8'ab'; // { dg-error "character constant too long for its type" } -const static char c2 = u8'\u0124'; // { dg-error "character constant too long for its type" } -const static char c3 = u8'\U00064321'; // { dg-error "character constant too long for its type" } +const static char c1 = u8'ab'; // { dg-error "multi-character literal cannot have an encoding prefix" } +const static char c2 = u8'\u0124'; // { dg-error "character not encodable in a single code unit" } +const static char c3 = u8'\U00064321'; // { dg-error "character not encodable in a single code unit" } diff --git a/gcc/testsuite/g++.dg/cpp23/wchar-multi1.C b/gcc/testsuite/g++.dg/cpp23/wchar-multi1.C index 77fa3a606dc5448d0a841326c80d292c44562b2f..d86dfc917906077e3da4686f397450eded2c6a87 100644 --- a/gcc/testsuite/g++.dg/cpp23/wchar-multi1.C +++ b/gcc/testsuite/g++.dg/cpp23/wchar-multi1.C @@ -4,18 +4,19 @@ char a = 'a'; int b = 'ab'; // { dg-warning "multi-character character constant" } -int c = '\u05D9'; // { dg-warning "multi-character character constant" } +int c = '\u05D9'; // { dg-error "character not encodable in a single execution character code unit" } #if __SIZEOF_INT__ > 2 -int d = '\U0001F525'; // { dg-warning "multi-character character constant" "" { target int32 } } +int d = '\U0001F525'; // { dg-error "character not encodable in a single execution character code unit" "" { target int32 } } #endif -int e = 'abcd'; // { dg-warning "multi-character character constant" } +int e = 'abcd'; // { dg-warning "multi-character character constant" "" { target int32plus } } + // { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" "" { target { ! int32plus } } .-1 } wchar_t f = L'f'; -wchar_t g = L'gh'; // { dg-error "character constant too long for its type" "" { target c++23 } } - // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 } -wchar_t h = L'ijkl'; // { dg-error "character constant too long for its type" "" { target c++23 } } - // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 } -wchar_t i = L'\U0001F525'; // { dg-error "character constant too long for its type" "" { target { c++23 && { ! 4byte_wchar_t } } } } - // { dg-warning "character constant too long for its type" "" { target { c++20_down && { ! 4byte_wchar_t } } } .-1 } +wchar_t g = L'gh'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } } + // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 } +wchar_t h = L'ijkl'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } } + // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 } +wchar_t i = L'\U0001F525'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target { c++23 && { ! 4byte_wchar_t } } } } + // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target { c++20_down && { ! 4byte_wchar_t } } } .-1 } #ifdef __cpp_char8_t typedef char8_t u8; #else @@ -23,20 +24,20 @@ typedef char u8; #endif #if __cpp_unicode_characters >= 201411 u8 j = u8'j'; -u8 k = u8'kl'; // { dg-error "character constant too long for its type" "" { target c++17 } } -u8 l = u8'\U0001F525'; // { dg-error "character constant too long for its type" "" { target c++17 } } +u8 k = u8'kl'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } } +u8 l = u8'\U0001F525'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } } #endif #if __cpp_unicode_characters >= 200704 char16_t m = u'm'; -char16_t n = u'no'; // { dg-error "character constant too long for its type" "" { target c++11 } } +char16_t n = u'no'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++11 } } char16_t o = u'\u05D9'; -char16_t p = u'\U0001F525'; // { dg-error "character constant too long for its type" "" { target c++11 } } +char16_t p = u'\U0001F525'; // { dg-error "character not encodable in a single code unit" "" { target c++11 } } char32_t q = U'm'; -char32_t r = U'no'; // { dg-error "character constant too long for its type" "" { target c++11 } } +char32_t r = U'no'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++11 } } char32_t s = U'\u05D9'; char32_t t = U'\U0001F525'; #endif -wchar_t u = L'\u0065\u0301'; // { dg-error "character constant too long for its type" "" { target c++23 } } - // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 } -wchar_t v = L'eÌ'; // { dg-error "character constant too long for its type" "" { target c++23 } } - // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 } +wchar_t u = L'\u0065\u0301'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } } + // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 } +wchar_t v = L'eÌ'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } } + // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 } diff --git a/gcc/testsuite/g++.dg/cpp23/wchar-multi2.C b/gcc/testsuite/g++.dg/cpp23/wchar-multi2.C index a63e0fffe6014b94fde25a3985f79822db2ec846..270de65988cb6150c6dccc6878756900cc3eb427 100644 --- a/gcc/testsuite/g++.dg/cpp23/wchar-multi2.C +++ b/gcc/testsuite/g++.dg/cpp23/wchar-multi2.C @@ -11,12 +11,12 @@ int d = '\U0001F525'; // { dg-warning "multi-character character constant" "" { #endif int e = 'abcd'; // { dg-warning "multi-character character constant" } wchar_t f = L'f'; -wchar_t g = L'gh'; // { dg-error "character constant too long for its type" "" { target c++23 } } - // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 } -wchar_t h = L'ijkl'; // { dg-error "character constant too long for its type" "" { target c++23 } } - // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 } -wchar_t i = L'\U0001F525'; // { dg-error "character constant too long for its type" "" { target { c++23 } } } - // { dg-warning "character constant too long for its type" "" { target { c++20_down } } .-1 } +wchar_t g = L'gh'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } } + // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 } +wchar_t h = L'ijkl'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } } + // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 } +wchar_t i = L'\U0001F525'; // { dg-error "character not encodable in a single code unit" "" { target { c++23 } } } + // { dg-warning "character not encodable in a single code unit" "" { target { c++20_down } } .-1 } #ifdef __cpp_char8_t typedef char8_t u8; #else @@ -24,20 +24,20 @@ typedef char u8; #endif #if __cpp_unicode_characters >= 201411 u8 j = u8'j'; -u8 k = u8'kl'; // { dg-error "character constant too long for its type" "" { target c++17 } } -u8 l = u8'\U0001F525'; // { dg-error "character constant too long for its type" "" { target c++17 } } +u8 k = u8'kl'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } } +u8 l = u8'\U0001F525'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } } #endif #if __cpp_unicode_characters >= 200704 char16_t m = u'm'; -char16_t n = u'no'; // { dg-error "character constant too long for its type" "" { target c++11 } } +char16_t n = u'no'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++11 } } char16_t o = u'\u05D9'; -char16_t p = u'\U0001F525'; // { dg-error "character constant too long for its type" "" { target c++11 } } +char16_t p = u'\U0001F525'; // { dg-error "character not encodable in a single code unit" "" { target c++11 } } char32_t q = U'm'; -char32_t r = U'no'; // { dg-error "character constant too long for its type" "" { target c++11 } } +char32_t r = U'no'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++11 } } char32_t s = U'\u05D9'; char32_t t = U'\U0001F525'; #endif -wchar_t u = L'\u0065\u0301'; // { dg-error "character constant too long for its type" "" { target c++23 } } - // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 } -wchar_t v = L'eÌ'; // { dg-error "character constant too long for its type" "" { target c++23 } } - // { dg-warning "character constant too long for its type" "" { target c++20_down } .-1 } +wchar_t u = L'\u0065\u0301'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } } + // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 } +wchar_t v = L'eÌ'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } } + // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 } diff --git a/gcc/testsuite/g++.dg/cpp26/literals1.C b/gcc/testsuite/g++.dg/cpp26/literals1.C new file mode 100644 index 0000000000000000000000000000000000000000..d51f2f8373f89c684ccc897dfae0170410f55cbf --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp26/literals1.C @@ -0,0 +1,66 @@ +// C++26 P1854R4 - Making non-encodable string literals ill-formed +// { dg-do compile { target c++11 } } +// { dg-require-effective-target int32 } +// { dg-options "-pedantic-errors -finput-charset=UTF-8 -fexec-charset=UTF-8" } + +int a = 'abcd'; // { dg-warning "multi-character character constant" } +int b = '\x61\x62\x63\x64'; // { dg-warning "multi-character character constant" } +int c = 'á'; // { dg-error "character not encodable in a single execution character code unit" } +int d = 'ðŸ˜'; // { dg-error "character not encodable in a single execution character code unit" } +int e = '\N{FACE WITH TEARS OF JOY}'; // { dg-error "character not encodable in a single execution character code unit" } + // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } .-1 } +int f = '\U0001F602'; // { dg-error "character not encodable in a single execution character code unit" } +wchar_t g = L'abcd'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } } + // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 } +wchar_t h = L'\x61\x62\x63\x64'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } } + // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 } +wchar_t i = L'á'; +char16_t j = u'abcd'; // { dg-error "multi-character literal cannot have an encoding prefix" } +char16_t k = u'\x61\x62\x63\x64'; // { dg-error "multi-character literal cannot have an encoding prefix" } +char16_t l = u'á'; +char16_t m = u'ðŸ˜'; // { dg-error "character not encodable in a single code unit" } +char16_t n = u'\N{FACE WITH TEARS OF JOY}'; // { dg-error "character not encodable in a single code unit" { target c++23 } } + // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } .-1 } +char16_t o = u'\U0001F602'; // { dg-error "character not encodable in a single code unit" } +char32_t p = U'abcd'; // { dg-error "multi-character literal cannot have an encoding prefix" } +char32_t q = U'\x61\x62\x63\x64'; // { dg-error "multi-character literal cannot have an encoding prefix" } +char32_t r = U'á'; +char32_t s = U'ðŸ˜'; +char32_t t = U'\N{FACE WITH TEARS OF JOY}'; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } } +char32_t u = U'\U0001F602'; +#if __cpp_unicode_characters >= 201411L +auto v = u8'abcd'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } } +auto w = u8'\x61\x62\x63\x64'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } } +auto x = u8'á'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } } +auto y = u8'ðŸ˜'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } } +auto z = u8'\N{FACE WITH TEARS OF JOY}'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } } + // { dg-error "named universal character escapes are only valid in" "" { target { c++17 && c++20_down } } .-1 } +auto aa = u8'\U0001F602'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } } +#endif +const char *ab = "ðŸ˜"; +const char *ac = "\N{FACE WITH TEARS OF JOY}"; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } } +const char *ad = "\U0001F602"; +const char16_t *ae = u"ðŸ˜"; +const char16_t *af = u"\N{FACE WITH TEARS OF JOY}"; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } } +const char16_t *ag = u"\U0001F602"; +const char32_t *ah = U"ðŸ˜"; +const char32_t *ai = U"\N{FACE WITH TEARS OF JOY}"; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } } +const char32_t *aj = U"\U0001F602"; +auto ak = u8"ðŸ˜"; +auto al = u8"\N{FACE WITH TEARS OF JOY}"; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } } +auto am = u8"\U0001F602"; +int an = '\x123456789'; // { dg-error "hex escape sequence out of range" } +wchar_t ao = L'\x123456789abcdef0'; // { dg-error "hex escape sequence out of range" } +char16_t ap = u'\x12345678'; // { dg-error "hex escape sequence out of range" } +char32_t aq = U'\x123456789abcdef0'; // { dg-error "hex escape sequence out of range" } +#if __cpp_unicode_characters >= 201411L +auto ar = u8'\x123456789abcdef0'; // { dg-error "hex escape sequence out of range" "" { target c++17 } } +#endif +char as = '\xff'; +#if __SIZEOF_WCHAR_T__ * __CHAR_BIT__ == 32 +wchar_t at = L'\xffffffff'; +#elif __SIZEOF_WCHAR_T__ * __CHAR_BIT__ == 16 +wchar_t at = L'\xffff'; +#endif +int au = '\x1234'; // { dg-error "hex escape sequence out of range" } +int av = 'abcdefghijklmnop'; // { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" } diff --git a/gcc/testsuite/g++.dg/cpp26/literals2.C b/gcc/testsuite/g++.dg/cpp26/literals2.C new file mode 100644 index 0000000000000000000000000000000000000000..11e4406f1114b7d022fd1ce40928312b0bd5e545 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp26/literals2.C @@ -0,0 +1,68 @@ +// C++26 P1854R4 - Making non-encodable string literals ill-formed +// { dg-do compile { target c++11 } } +// { dg-require-effective-target int32 } +// { dg-options "-pedantic-errors -finput-charset=UTF-8 -fexec-charset=ISO-8859-1" } +/* { dg-require-iconv "ISO-8859-1" } */ + +int a = 'abcd'; // { dg-warning "multi-character character constant" } +int b = '\x61\x62\x63\x64'; // { dg-warning "multi-character character constant" } +int c = 'á'; +int d = 'ðŸ˜'; // { dg-error "converting to execution character set" } +int e = '\N{FACE WITH TEARS OF JOY}'; // { dg-error "converting UCN to execution character set" } + // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } .-1 } +int f = '\U0001F602'; // { dg-error "converting UCN to execution character set" } +wchar_t g = L'abcd'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } } + // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 } +wchar_t h = L'\x61\x62\x63\x64'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++23 } } + // { dg-warning "multi-character literal cannot have an encoding prefix" "" { target c++20_down } .-1 } +wchar_t i = L'á'; +char16_t j = u'abcd'; // { dg-error "multi-character literal cannot have an encoding prefix" } +char16_t k = u'\x61\x62\x63\x64'; // { dg-error "multi-character literal cannot have an encoding prefix" } +char16_t l = u'á'; +char16_t m = u'ðŸ˜'; // { dg-error "character not encodable in a single code unit" } +char16_t n = u'\N{FACE WITH TEARS OF JOY}'; // { dg-error "character not encodable in a single code unit" { target c++23 } } + // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } .-1 } +char16_t o = u'\U0001F602'; // { dg-error "character not encodable in a single code unit" } +char32_t p = U'abcd'; // { dg-error "multi-character literal cannot have an encoding prefix" } +char32_t q = U'\x61\x62\x63\x64'; // { dg-error "multi-character literal cannot have an encoding prefix" } +char32_t r = U'á'; +char32_t s = U'ðŸ˜'; +char32_t t = U'\N{FACE WITH TEARS OF JOY}'; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } } +char32_t u = U'\U0001F602'; +#if __cpp_unicode_characters >= 201411L +auto v = u8'abcd'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } } +auto w = u8'\x61\x62\x63\x64'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } } +auto x = u8'á'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } } +auto y = u8'ðŸ˜'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } } +auto z = u8'\N{FACE WITH TEARS OF JOY}'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } } + // { dg-error "named universal character escapes are only valid in" "" { target { c++17 && c++20_down } } .-1 } +auto aa = u8'\U0001F602'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } } +#endif +const char *ab = "ðŸ˜"; // { dg-error "converting to execution character set" } +const char *ac = "\N{FACE WITH TEARS OF JOY}"; // { dg-error "converting UCN to execution character set" } + // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } .-1 } +const char *ad = "\U0001F602"; // { dg-error "converting UCN to execution character set" } +const char16_t *ae = u"ðŸ˜"; +const char16_t *af = u"\N{FACE WITH TEARS OF JOY}"; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } } +const char16_t *ag = u"\U0001F602"; +const char32_t *ah = U"ðŸ˜"; +const char32_t *ai = U"\N{FACE WITH TEARS OF JOY}"; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } } +const char32_t *aj = U"\U0001F602"; +auto ak = u8"ðŸ˜"; +auto al = u8"\N{FACE WITH TEARS OF JOY}"; // { dg-error "named universal character escapes are only valid in" "" { target c++20_down } } +auto am = u8"\U0001F602"; +int an = '\x123456789'; // { dg-error "hex escape sequence out of range" } +wchar_t ao = L'\x123456789abcdef0'; // { dg-error "hex escape sequence out of range" } +char16_t ap = u'\x12345678'; // { dg-error "hex escape sequence out of range" } +char32_t aq = U'\x123456789abcdef0'; // { dg-error "hex escape sequence out of range" } +#if __cpp_unicode_characters >= 201411L +auto ar = u8'\x123456789abcdef0'; // { dg-error "hex escape sequence out of range" "" { target c++17 } } +#endif +char as = '\xff'; +#if __SIZEOF_WCHAR_T__ * __CHAR_BIT__ == 32 +wchar_t at = L'\xffffffff'; +#elif __SIZEOF_WCHAR_T__ * __CHAR_BIT__ == 16 +wchar_t at = L'\xffff'; +#endif +int au = '\x1234'; // { dg-error "hex escape sequence out of range" } +int av = 'abcdefghijklmnop'; // { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" } diff --git a/gcc/testsuite/g++.dg/cpp2a/ucn2.C b/gcc/testsuite/g++.dg/cpp2a/ucn2.C index ee7011b4a3b7163b0aff276101ef0f338d0bbff6..53ee06e897135ebd568234e4d341abf49549d3ba 100644 --- a/gcc/testsuite/g++.dg/cpp2a/ucn2.C +++ b/gcc/testsuite/g++.dg/cpp2a/ucn2.C @@ -12,18 +12,18 @@ const char32_t *f = U"\uD802"; // { dg-error "is not a valid universal characte const char32_t *g = U"\U0000DFF0"; // { dg-error "is not a valid universal character" } const char32_t *h = U"\U00110001"; // { dg-error "is outside the UCS codespace" "" { target c++20 } } #if __cpp_unicode_characters >= 201411 -const char8_t i = u8'\u00C0'; // { dg-error "character constant too long for its type" "" { target c++17 } } +const char8_t i = u8'\u00C0'; // { dg-error "character not encodable in a single code unit" "" { target c++17 } } #endif -const char16_t j = u'\U0001F914'; // { dg-error "character constant too long for its type" } +const char16_t j = u'\U0001F914'; // { dg-error "character not encodable in a single code unit" } const char32_t k = U'\U0001F914'; #if __cpp_unicode_characters >= 201411 -const char8_t l = u8'ab'; // { dg-error "character constant too long for its type" "" { target c++17 } } +const char8_t l = u8'ab'; // { dg-error "multi-character literal cannot have an encoding prefix" "" { target c++17 } } #endif -const char16_t m = u'ab'; // { dg-error "character constant too long for its type" } -const char32_t n = U'ab'; // { dg-error "character constant too long for its type" } +const char16_t m = u'ab'; // { dg-error "multi-character literal cannot have an encoding prefix" } +const char32_t n = U'ab'; // { dg-error "multi-character literal cannot have an encoding prefix" } #if __cpp_unicode_characters >= 201411 const char8_t o = u8'\U00110002'; // { dg-error "is outside the UCS codespace" "" { target c++20 } } - // { dg-error "character constant too long for its type" "" { target c++17 } .-1 } + // { dg-error "character not encodable in a single code unit" "" { target c++17 } .-1 } #endif const char16_t p = u'\U00110003'; // { dg-error "is outside the UCS codespace" "" { target c++20 } } // { dg-error "converting UCN to execution character set" "" { target *-*-* } .-1 } diff --git a/gcc/testsuite/g++.dg/ext/utf16-4.C b/gcc/testsuite/g++.dg/ext/utf16-4.C index 030e085a82e491e8f99c169aba243c75160e2155..feb81667b7b5986ca44cc954fede6911eb45c98e 100644 --- a/gcc/testsuite/g++.dg/ext/utf16-4.C +++ b/gcc/testsuite/g++.dg/ext/utf16-4.C @@ -4,8 +4,8 @@ const static char16_t c0 = u''; /* { dg-error "empty character" } */ -const static char16_t c1 = u'ab'; /* { dg-error "constant too long" } */ -const static char16_t c2 = u'\U00064321'; /* { dg-error "constant too long" } */ +const static char16_t c1 = u'ab'; /* { dg-error "multi-character literal cannot have an encoding prefix" } */ +const static char16_t c2 = u'\U00064321'; /* { dg-error "character not encodable in a single code unit" } */ const static char16_t c3 = 'a'; const static char16_t c4 = U'a'; @@ -14,5 +14,6 @@ const static char16_t c6 = U'\U00064321'; /* { dg-warning "conversion from .char const static char16_t c7 = L'a'; const static char16_t c8 = L'\u2029'; const static char16_t c9 = L'\U00064321'; /* { dg-warning "conversion from .wchar_t. to .char16_t. changes value from .410401. to .17185." "" { target { 4byte_wchar_t } } } */ - /* { dg-warning "constant too long" "" { target { ! 4byte_wchar_t } } .-1 } */ + /* { dg-warning "character not encodable in a single code unit" "" { target { { ! 4byte_wchar_t } && c++20_down } } .-1 } */ + /* { dg-error "character not encodable in a single code unit" "" { target { { ! 4byte_wchar_t } && c++23 } } .-2 } */ int main () {} diff --git a/gcc/testsuite/g++.dg/ext/utf32-4.C b/gcc/testsuite/g++.dg/ext/utf32-4.C index 96bf0bb2b4a4ec33fe089c6e62e1621bca9c26a9..8310bf4055c46d4f248ab9afa97a521345014a34 100644 --- a/gcc/testsuite/g++.dg/ext/utf32-4.C +++ b/gcc/testsuite/g++.dg/ext/utf32-4.C @@ -3,15 +3,16 @@ /* { dg-do compile { target c++11 } } */ const static char32_t c0 = U''; /* { dg-error "empty character" } */ -const static char32_t c1 = U'ab'; /* { dg-error "constant too long" } */ +const static char32_t c1 = U'ab'; /* { dg-error "multi-character literal cannot have an encoding prefix" } */ const static char32_t c2 = U'\U00064321'; const static char32_t c3 = 'a'; const static char32_t c4 = u'a'; const static char32_t c5 = u'\u2029'; -const static char32_t c6 = u'\U00064321'; /* { dg-error "constant too long" } */ +const static char32_t c6 = u'\U00064321'; /* { dg-error "character not encodable in a single code unit" } */ const static char32_t c7 = L'a'; const static char32_t c8 = L'\u2029'; -const static char32_t c9 = L'\U00064321'; /* { dg-warning "constant too long" "" { target { ! 4byte_wchar_t } } } */ +const static char32_t c9 = L'\U00064321'; /* { dg-warning "character not encodable in a single code unit" "" { target { { ! 4byte_wchar_t } && c++20_down } } } */ + /* { dg-error "character not encodable in a single code unit" "" { target { { ! 4byte_wchar_t } && c++23 } } .-1 } */ int main () {} diff --git a/gcc/testsuite/gcc.dg/c23-utf8char-3.c b/gcc/testsuite/gcc.dg/c23-utf8char-3.c index e152edbed84522a8c7c17785ef6d9307c684cf0d..0d82af52f0513f28e13986a24088d1f52db626af 100644 --- a/gcc/testsuite/gcc.dg/c23-utf8char-3.c +++ b/gcc/testsuite/gcc.dg/c23-utf8char-3.c @@ -3,6 +3,6 @@ /* { dg-options "-std=c23 -pedantic-errors" } */ unsigned char a = u8''; /* { dg-error "empty character constant" } */ -unsigned char b = u8'ab'; /* { dg-error "character constant too long for its type" } */ -unsigned char c = u8'\u00ff'; /* { dg-error "character constant too long for its type" } */ +unsigned char b = u8'ab'; /* { dg-error "multi-character literal cannot have an encoding prefix" } */ +unsigned char c = u8'\u00ff'; /* { dg-error "character not encodable in a single code unit" } */ unsigned char d = u8'\x100'; /* { dg-error "hex escape sequence out of range" } */ diff --git a/gcc/testsuite/gcc.dg/cpp/charconst-4.c b/gcc/testsuite/gcc.dg/cpp/charconst-4.c index 9ea5e8ab89692489ffdb67fce16ecae22dcc5c6c..03706c5dac2e84ac76811b871f8be4f90db2e8a6 100644 --- a/gcc/testsuite/gcc.dg/cpp/charconst-4.c +++ b/gcc/testsuite/gcc.dg/cpp/charconst-4.c @@ -38,7 +38,7 @@ extern void abort (void); # error Charconst incorrectly sign-extended #endif -#if LONG_CHARCONST != SHORT_CHARCONST /* { dg-warning "too long" } */ +#if LONG_CHARCONST != SHORT_CHARCONST /* { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" } */ # error Overly long charconst truncates wrongly for preprocessor #endif @@ -46,7 +46,7 @@ int main () { if (POS_CHARCONST < 0) abort (); - if (LONG_CHARCONST != SHORT_CHARCONST) /* { dg-warning "too long" } */ + if (LONG_CHARCONST != SHORT_CHARCONST) /* { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" } */ abort (); return 0; } diff --git a/gcc/testsuite/gcc.dg/cpp/charconst.c b/gcc/testsuite/gcc.dg/cpp/charconst.c index 8934d6a6721db7a131b5367589c9d40cb788d4b3..a2a5717c663b63954f4910721dd349909b9c75bf 100644 --- a/gcc/testsuite/gcc.dg/cpp/charconst.c +++ b/gcc/testsuite/gcc.dg/cpp/charconst.c @@ -11,9 +11,9 @@ #endif #if L'' /* { dg-error "empty" "empty wide charconst" } */ #endif -#if 'very long' /* { dg-warning "too long" "long charconst" } */ +#if 'very long' /* { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" "long charconst" } */ #endif -#if L'very long' /* { dg-warning "too long" "long wide charconst" } */ +#if L'very long' /* { dg-warning "multi-character literal cannot have an encoding prefix" "long wide charconst" } */ #endif /* Don't do this test for L'ab'; it depends upon sizeof (wchar_t). */ #if 'ab' /* { dg-warning "multi-char" "multi-character" } */ @@ -27,10 +27,10 @@ void foo () c = ''; /* { dg-error "empty" "empty charconst" } */ w = L''; /* { dg-error "empty" "empty wide charconst" } */ - c = 'very long'; /* { dg-warning "too long" "long charconst" } */ - w = L'very long'; /* { dg-warning "too long" "long wide charconst" } */ + c = 'very long'; /* { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" "long charconst" } */ + w = L'very long'; /* { dg-warning "multi-character literal cannot have an encoding prefix" "long wide charconst" } */ c = 'ab'; /* { dg-warning "multi-char" "multi-char" } */ /* Wide charconsts cannot contain more than one wide character. */ - w = L'ab'; /* { dg-warning "too long" "multi-char wide" } */ + w = L'ab'; /* { dg-warning "multi-character literal cannot have an encoding prefix" "multi-char wide" } */ } diff --git a/gcc/testsuite/gcc.dg/cpp/if-2.c b/gcc/testsuite/gcc.dg/cpp/if-2.c index dc136b4563446a5cd11411b0a09520f3d47b3f13..e969aa0b473bb89e1061c10c23993851e285cd97 100644 --- a/gcc/testsuite/gcc.dg/cpp/if-2.c +++ b/gcc/testsuite/gcc.dg/cpp/if-2.c @@ -21,7 +21,7 @@ #if 'abcd' /* { dg-warning "(multi-character character constant)|(character constant (is )?too long)" "multi-character charconst" } */ #endif -#if 'abcdefghi' /* { dg-warning "character constant (is )?too long" "charconst too long" } */ +#if 'abcdefghi' /* { dg-warning "multi-character literal with \[0-9]+ characters exceeds 'int' size of \[0-9]+ bytes" } */ #endif #if '' /* { dg-error "empty character constant" "empty charconst" } */ diff --git a/gcc/testsuite/gcc.dg/utf16-4.c b/gcc/testsuite/gcc.dg/utf16-4.c index 60e00f60ee4d3f90e907233629351d46f0fff142..e108d00afdfd98ac1329191b4ac0a54167db7f00 100644 --- a/gcc/testsuite/gcc.dg/utf16-4.c +++ b/gcc/testsuite/gcc.dg/utf16-4.c @@ -6,8 +6,8 @@ typedef __CHAR16_TYPE__ char16_t; char16_t c0 = u''; /* { dg-error "empty character" } */ -char16_t c1 = u'ab'; /* { dg-warning "constant too long" } */ -char16_t c2 = u'\U00064321'; /* { dg-warning "constant too long" } */ +char16_t c1 = u'ab'; /* { dg-warning "multi-character literal cannot have an encoding prefix" } */ +char16_t c2 = u'\U00064321'; /* { dg-warning "character not encodable in a single code unit" } */ char16_t c3 = 'a'; char16_t c4 = U'a'; @@ -16,6 +16,6 @@ char16_t c6 = U'\U00064321'; /* { dg-warning "conversion from .(long )?unsigned char16_t c7 = L'a'; char16_t c8 = L'\u2029'; char16_t c9 = L'\U00064321'; /* { dg-warning "conversion" "" { target { 4byte_wchar_t } } } */ - /* { dg-warning "constant too long" "" { target { ! 4byte_wchar_t } } .-1 } */ + /* { dg-warning "character not encodable in a single code unit" "" { target { ! 4byte_wchar_t } } .-1 } */ int main () {} diff --git a/gcc/testsuite/gcc.dg/utf32-4.c b/gcc/testsuite/gcc.dg/utf32-4.c index aa7f66a36851761f15a7163c7566341f3261f1c1..72086bc7c6a111a85e1b0d14c600bb487c11d0a5 100644 --- a/gcc/testsuite/gcc.dg/utf32-4.c +++ b/gcc/testsuite/gcc.dg/utf32-4.c @@ -6,15 +6,15 @@ typedef __CHAR32_TYPE__ char32_t; char32_t c0 = U''; /* { dg-error "empty character" } */ -char32_t c1 = U'ab'; /* { dg-warning "constant too long" } */ +char32_t c1 = U'ab'; /* { dg-warning "multi-character literal cannot have an encoding prefix" } */ char32_t c2 = U'\U00064321'; char32_t c3 = 'a'; char32_t c4 = u'a'; char32_t c5 = u'\u2029'; -char32_t c6 = u'\U00064321'; /* { dg-warning "constant too long" } */ +char32_t c6 = u'\U00064321'; /* { dg-warning "character not encodable in a single code unit" } */ char32_t c7 = L'a'; char32_t c8 = L'\u2029'; -char32_t c9 = L'\U00064321'; /* { dg-warning "constant too long" "" { target { ! 4byte_wchar_t } } } */ +char32_t c9 = L'\U00064321'; /* { dg-warning "character not encodable in a single code unit" "" { target { ! 4byte_wchar_t } } } */ int main () {} diff --git a/libcpp/charset.cc b/libcpp/charset.cc index d5a027502cd6980831cb9a64a62b0b3c841c63ac..9a944d94360cadc636f1dadd55be766b117fffea 100644 --- a/libcpp/charset.cc +++ b/libcpp/charset.cc @@ -446,6 +446,73 @@ one_utf16_to_utf8 (iconv_t bigend, const uchar **inbufp, size_t *inbytesleftp, return 0; } + +/* Special routine which just counts number of characters in the + string, what exactly is stored into the output doesn't matter + as long as it is one uchar per character. */ + +static inline int +one_count_chars (iconv_t, const uchar **inbufp, size_t *inbytesleftp, + uchar **outbufp, size_t *outbytesleftp) +{ + cppchar_t s = 0; + int rval; + + /* Check for space first, since we know exactly how much we need. */ + if (*outbytesleftp < 1) + return E2BIG; + +#if HOST_CHARSET == HOST_CHARSET_ASCII + rval = one_utf8_to_cppchar (inbufp, inbytesleftp, &s); + if (rval) + return rval; +#else + if (*inbytesleftp < 1) + return EINVAL; + static const uchar utf_ebcdic_map[256] = { + /* See table 4 in http://unicode.org/reports/tr16/tr16-7.2.html */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, + 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1, + 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, + 9, 9, 9, 9, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, + 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, + 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 1, 3, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, + 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 5, 5, 5, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 6, 6, 7, 7, 0 + }; + rval = utf_ebcdic_map[**inbufp]; + if (rval == 9) + return EILSEQ; + if (rval == 0) + rval = 1; + if (rval >= 2) + { + if (*inbytesleftp < rval) + return EINVAL; + for (int i = 1; i < rval; ++i) + if (utf_ebcdic_map[(*inbufp)[i]] != 9) + return EILSEQ; + } + *inbytesleftp -= rval; + *inbufp += rval; +#endif + + **outbufp = ' '; + + *outbufp += 1; + *outbytesleftp -= 1; + return 0; +} + + /* Helper routine for the next few functions. The 'const' on one_conversion means that we promise not to modify what function is pointed to, which lets the inliner see through it. */ @@ -529,6 +596,15 @@ convert_utf32_utf8 (iconv_t cd, const uchar *from, size_t flen, return conversion_loop (one_utf32_to_utf8, cd, from, flen, to); } +/* Magic conversion which just counts characters from input, so + only to->len is significant. */ +static bool +convert_count_chars (iconv_t cd, const uchar *from, + size_t flen, struct _cpp_strbuf *to) +{ + return conversion_loop (one_count_chars, cd, from, flen, to); +} + /* Identity conversion, used when we have no alternative. */ static bool convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED, @@ -2574,21 +2650,49 @@ cpp_interpret_string_notranslate (cpp_reader *pfile, const cpp_string *from, } +/* Return number of source characters in STR. */ +static unsigned +count_source_chars (cpp_reader *pfile, cpp_string str, cpp_ttype type) +{ + cpp_string str2 = { 0, 0 }; + bool (*saved_diagnostic_handler) (cpp_reader *, enum cpp_diagnostic_level, + enum cpp_warning_reason, rich_location *, + const char *, va_list *) + ATTRIBUTE_FPTR_PRINTF(5,0); + saved_diagnostic_handler = pfile->cb.diagnostic; + pfile->cb.diagnostic = noop_diagnostic_cb; + convert_f save_func = pfile->narrow_cset_desc.func; + pfile->narrow_cset_desc.func = convert_count_chars; + bool ret = cpp_interpret_string (pfile, &str, 1, &str2, type); + pfile->narrow_cset_desc.func = save_func; + pfile->cb.diagnostic = saved_diagnostic_handler; + if (ret) + { + if (str2.text != str.text) + free ((void *)str2.text); + return str2.len; + } + else + return 0; +} + /* Subroutine of cpp_interpret_charconst which performs the conversion to a number, for narrow strings. STR is the string structure returned by cpp_interpret_string. PCHARS_SEEN and UNSIGNEDP are as for - cpp_interpret_charconst. TYPE is the token type. */ + cpp_interpret_charconst. TOKEN is the token. */ static cppchar_t narrow_str_to_charconst (cpp_reader *pfile, cpp_string str, unsigned int *pchars_seen, int *unsignedp, - enum cpp_ttype type) + const cpp_token *token) { + enum cpp_ttype type = token->type; size_t width = CPP_OPTION (pfile, char_precision); size_t max_chars = CPP_OPTION (pfile, int_precision) / width; size_t mask = width_to_mask (width); size_t i; cppchar_t result, c; bool unsigned_p; + bool diagnosed = false; /* The value of a multi-character character constant, or a single-character character constant whose representation in the @@ -2612,11 +2716,55 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str, if (type == CPP_UTF8CHAR) max_chars = 1; - if (i > max_chars) + else if (i > 1 && CPP_OPTION (pfile, cplusplus) && CPP_PEDANTIC (pfile)) { + /* C++ as a DR since + P1854R4 - Making non-encodable string literals ill-formed + makes multi-character narrow character literals if any of the + characters in the literal isn't encodable in char/unsigned char + ill-formed. We need to count the number of c-chars and compare + that to str.len. */ + unsigned src_chars = count_source_chars (pfile, token->val.str, type); + + if (src_chars) + { + if (str.len > src_chars) + { + if (src_chars <= 2) + diagnosed + = cpp_error (pfile, CPP_DL_PEDWARN, + "character not encodable in a single execution " + "character code unit"); + else + diagnosed + = cpp_error (pfile, CPP_DL_PEDWARN, + "at least one character in a multi-character " + "literal not encodable in a single execution " + "character code unit"); + if (diagnosed && i > max_chars) + i = max_chars; + } + } + } + if (diagnosed) + /* Already diagnosed above. */; + else if (i > max_chars) + { + unsigned src_chars + = count_source_chars (pfile, token->val.str, + type == CPP_UTF8CHAR ? CPP_CHAR : type); + + if (type != CPP_UTF8CHAR) + cpp_error (pfile, CPP_DL_WARNING, + "multi-character literal with %ld characters exceeds " + "'int' size of %ld bytes", (long) i, (long) max_chars); + else if (src_chars > 2) + cpp_error (pfile, CPP_DL_ERROR, + "multi-character literal cannot have an encoding prefix"); + else + cpp_error (pfile, CPP_DL_ERROR, + "character not encodable in a single code unit"); i = max_chars; - cpp_error (pfile, type == CPP_UTF8CHAR ? CPP_DL_ERROR : CPP_DL_WARNING, - "character constant too long for its type"); } else if (i > 1 && CPP_OPTION (pfile, warn_multichar)) cpp_warning (pfile, CPP_W_MULTICHAR, "multi-character character constant"); @@ -2651,12 +2799,13 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str, /* Subroutine of cpp_interpret_charconst which performs the conversion to a number, for wide strings. STR is the string structure returned by cpp_interpret_string. PCHARS_SEEN and UNSIGNEDP are as for - cpp_interpret_charconst. TYPE is the token type. */ + cpp_interpret_charconst. TOKEN is the token. */ static cppchar_t wide_str_to_charconst (cpp_reader *pfile, cpp_string str, unsigned int *pchars_seen, int *unsignedp, - enum cpp_ttype type) + const cpp_token *token) { + enum cpp_ttype type = token->type; bool bigend = CPP_OPTION (pfile, bytes_big_endian); size_t width = converter_for_type (pfile, type).width; size_t cwidth = CPP_OPTION (pfile, char_precision); @@ -2692,14 +2841,25 @@ wide_str_to_charconst (cpp_reader *pfile, cpp_string str, character exactly fills a wchar_t, so a multi-character wide character constant is guaranteed to overflow. */ if (str.len > nbwc * 2) - cpp_error (pfile, (CPP_OPTION (pfile, cplusplus) - && (type == CPP_CHAR16 - || type == CPP_CHAR32 - /* In C++23 this is error even for L'ab'. */ - || (type == CPP_WCHAR - && CPP_OPTION (pfile, size_t_literals)))) - ? CPP_DL_ERROR : CPP_DL_WARNING, - "character constant too long for its type"); + { + cpp_diagnostic_level level = CPP_DL_WARNING; + unsigned src_chars + = count_source_chars (pfile, token->val.str, CPP_CHAR); + + if (CPP_OPTION (pfile, cplusplus) + && (type == CPP_CHAR16 + || type == CPP_CHAR32 + /* In C++23 this is error even for L'ab'. */ + || (type == CPP_WCHAR + && CPP_OPTION (pfile, size_t_literals)))) + level = CPP_DL_ERROR; + if (src_chars > 2) + cpp_error (pfile, level, + "multi-character literal cannot have an encoding prefix"); + else + cpp_error (pfile, level, + "character not encodable in a single code unit"); + } /* Truncate the constant to its natural width, and simultaneously sign- or zero-extend to the full width of cppchar_t. */ @@ -2754,10 +2914,10 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token, if (wide) result = wide_str_to_charconst (pfile, str, pchars_seen, unsignedp, - token->type); + token); else result = narrow_str_to_charconst (pfile, str, pchars_seen, unsignedp, - token->type); + token); if (str.text != token->val.str.text) free ((void *)str.text);