diff --git a/gcc/c-family/c-lex.cc b/gcc/c-family/c-lex.cc index 8bfa4f4024fff267de283876cf7bcd4b354b0349..0b6f94e18a82d10b0cd2a5aff3cb8c1741e18838 100644 --- a/gcc/c-family/c-lex.cc +++ b/gcc/c-family/c-lex.cc @@ -1352,7 +1352,14 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate) default: case CPP_STRING: case CPP_UTF8STRING: - value = build_string (1, ""); + if (type == CPP_UTF8STRING && flag_char8_t) + { + value = build_string (TYPE_PRECISION (char8_type_node) + / TYPE_PRECISION (char_type_node), + ""); /* char8_t is 8 bits */ + } + else + value = build_string (1, ""); break; case CPP_STRING16: value = build_string (TYPE_PRECISION (char16_type_node) @@ -1425,9 +1432,7 @@ lex_charconst (const cpp_token *token) type = char16_type_node; else if (token->type == CPP_UTF8CHAR) { - if (!c_dialect_cxx ()) - type = unsigned_char_type_node; - else if (flag_char8_t) + if (flag_char8_t) type = char8_type_node; else type = char_type_node; diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc index 4e1463689de33f7b5ddc4e9e11f2fc5b1bfa3fae..1cf119a9becf8453ffafa1498384d3ff658d9459 100644 --- a/gcc/c-family/c-opts.cc +++ b/gcc/c-family/c-opts.cc @@ -1059,9 +1059,9 @@ c_common_post_options (const char **pfilename) if (flag_sized_deallocation == -1) flag_sized_deallocation = (cxx_dialect >= cxx14); - /* char8_t support is new in C++20. */ + /* char8_t support is implicitly enabled in C++20 and C2X. */ if (flag_char8_t == -1) - flag_char8_t = (cxx_dialect >= cxx20); + flag_char8_t = (cxx_dialect >= cxx20) || flag_isoc2x; if (flag_extern_tls_init) { diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc index 92049d1a101da86db8b000ac676b30c63e5947b3..fa9395986deee5ff8edde8fd79e87952a041e644 100644 --- a/gcc/c/c-parser.cc +++ b/gcc/c/c-parser.cc @@ -7447,7 +7447,14 @@ c_parser_string_literal (c_parser *parser, bool translate, bool wide_ok) default: case CPP_STRING: case CPP_UTF8STRING: - value = build_string (1, ""); + if (type == CPP_UTF8STRING && flag_char8_t) + { + value = build_string (TYPE_PRECISION (char8_type_node) + / TYPE_PRECISION (char_type_node), + ""); /* char8_t is 8 bits */ + } + else + value = build_string (1, ""); break; case CPP_STRING16: value = build_string (TYPE_PRECISION (char16_type_node) @@ -7472,9 +7479,14 @@ c_parser_string_literal (c_parser *parser, bool translate, bool wide_ok) { default: case CPP_STRING: - case CPP_UTF8STRING: TREE_TYPE (value) = char_array_type_node; break; + case CPP_UTF8STRING: + if (flag_char8_t) + TREE_TYPE (value) = char8_array_type_node; + else + TREE_TYPE (value) = char_array_type_node; + break; case CPP_STRING16: TREE_TYPE (value) = char16_array_type_node; break; diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc index 8514488b7a55e0b666732ce1cddf44b500117748..d37de2a313ba434f56600b935bec922bf397a083 100644 --- a/gcc/c/c-typeck.cc +++ b/gcc/c/c-typeck.cc @@ -8056,7 +8056,7 @@ digest_init (location_t init_loc, tree type, tree init, tree origtype, if (char_array) { - if (typ2 != char_type_node) + if (typ2 != char_type_node && typ2 != char8_type_node) incompat_string_cst = true; } else if (!comptypes (typ1, typ2)) diff --git a/gcc/ginclude/stdatomic.h b/gcc/ginclude/stdatomic.h index bfcfdf664c7f819873ffa9615ffc5597c3be38b6..9f2475b739d1061c8bc27419bb9f611979992430 100644 --- a/gcc/ginclude/stdatomic.h +++ b/gcc/ginclude/stdatomic.h @@ -49,6 +49,9 @@ typedef _Atomic long atomic_long; typedef _Atomic unsigned long atomic_ulong; typedef _Atomic long long atomic_llong; typedef _Atomic unsigned long long atomic_ullong; +#ifdef __CHAR8_TYPE__ +typedef _Atomic __CHAR8_TYPE__ atomic_char8_t; +#endif typedef _Atomic __CHAR16_TYPE__ atomic_char16_t; typedef _Atomic __CHAR32_TYPE__ atomic_char32_t; typedef _Atomic __WCHAR_TYPE__ atomic_wchar_t; @@ -97,6 +100,9 @@ extern void atomic_signal_fence (memory_order); #define ATOMIC_BOOL_LOCK_FREE __GCC_ATOMIC_BOOL_LOCK_FREE #define ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE +#ifdef __GCC_ATOMIC_CHAR8_T_LOCK_FREE +#define ATOMIC_CHAR8_T_LOCK_FREE __GCC_ATOMIC_CHAR8_T_LOCK_FREE +#endif #define ATOMIC_CHAR16_T_LOCK_FREE __GCC_ATOMIC_CHAR16_T_LOCK_FREE #define ATOMIC_CHAR32_T_LOCK_FREE __GCC_ATOMIC_CHAR32_T_LOCK_FREE #define ATOMIC_WCHAR_T_LOCK_FREE __GCC_ATOMIC_WCHAR_T_LOCK_FREE diff --git a/gcc/testsuite/gcc.dg/atomic/c2x-stdatomic-lockfree-char8_t.c b/gcc/testsuite/gcc.dg/atomic/c2x-stdatomic-lockfree-char8_t.c new file mode 100644 index 0000000000000000000000000000000000000000..1b692f55ed0fa47651ab833891245fe359ab94c0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/atomic/c2x-stdatomic-lockfree-char8_t.c @@ -0,0 +1,42 @@ +/* Test atomic_is_lock_free for char8_t. */ +/* { dg-do run } */ +/* { dg-options "-std=c2x -pedantic-errors" } */ + +#include <stdatomic.h> +#include <stdint.h> + +extern void abort (void); + +_Atomic __CHAR8_TYPE__ ac8a; +atomic_char8_t ac8t; + +#define CHECK_TYPE(MACRO, V1, V2) \ + do \ + { \ + int r1 = MACRO; \ + int r2 = atomic_is_lock_free (&V1); \ + int r3 = atomic_is_lock_free (&V2); \ + if (r1 != 0 && r1 != 1 && r1 != 2) \ + abort (); \ + if (r2 != 0 && r2 != 1) \ + abort (); \ + if (r3 != 0 && r3 != 1) \ + abort (); \ + if (r1 == 2 && r2 != 1) \ + abort (); \ + if (r1 == 2 && r3 != 1) \ + abort (); \ + if (r1 == 0 && r2 != 0) \ + abort (); \ + if (r1 == 0 && r3 != 0) \ + abort (); \ + } \ + while (0) + +int +main () +{ + CHECK_TYPE (ATOMIC_CHAR8_T_LOCK_FREE, ac8a, ac8t); + + return 0; +} diff --git a/gcc/testsuite/gcc.dg/atomic/gnu2x-stdatomic-lockfree-char8_t.c b/gcc/testsuite/gcc.dg/atomic/gnu2x-stdatomic-lockfree-char8_t.c new file mode 100644 index 0000000000000000000000000000000000000000..27a3cfe355281309a6c056f6196e6a8d5150085f --- /dev/null +++ b/gcc/testsuite/gcc.dg/atomic/gnu2x-stdatomic-lockfree-char8_t.c @@ -0,0 +1,5 @@ +/* Test atomic_is_lock_free for char8_t with -std=gnu2x. */ +/* { dg-do run } */ +/* { dg-options "-std=gnu2x -pedantic-errors" } */ + +#include "c2x-stdatomic-lockfree-char8_t.c" diff --git a/gcc/testsuite/gcc.dg/c11-utf8str-type.c b/gcc/testsuite/gcc.dg/c11-utf8str-type.c new file mode 100644 index 0000000000000000000000000000000000000000..8be9abb9686d496d0e5d0a2f27789175820abc79 --- /dev/null +++ b/gcc/testsuite/gcc.dg/c11-utf8str-type.c @@ -0,0 +1,6 @@ +/* Test C11 UTF-8 string literal type. */ +/* { dg-do compile } */ +/* { dg-options "-std=c11" } */ + +_Static_assert (_Generic (u8"text", char*: 1, default: 2) == 1, "UTF-8 string literals have an unexpected type"); +_Static_assert (_Generic (u8"x"[0], char: 1, default: 2) == 1, "UTF-8 string literal elements have an unexpected type"); diff --git a/gcc/testsuite/gcc.dg/c17-utf8str-type.c b/gcc/testsuite/gcc.dg/c17-utf8str-type.c new file mode 100644 index 0000000000000000000000000000000000000000..515c6db3970ea89dbc2b9848a2b9a0328d01e989 --- /dev/null +++ b/gcc/testsuite/gcc.dg/c17-utf8str-type.c @@ -0,0 +1,6 @@ +/* Test C17 UTF-8 string literal type. */ +/* { dg-do compile } */ +/* { dg-options "-std=c17" } */ + +_Static_assert (_Generic (u8"text", char*: 1, default: 2) == 1, "UTF-8 string literals have an unexpected type"); +_Static_assert (_Generic (u8"x"[0], char: 1, default: 2) == 1, "UTF-8 string literal elements have an unexpected type"); diff --git a/gcc/testsuite/gcc.dg/c2x-utf8str-type.c b/gcc/testsuite/gcc.dg/c2x-utf8str-type.c new file mode 100644 index 0000000000000000000000000000000000000000..ebdde97b57a3c92e03e26e39eef90b172acd4b2b --- /dev/null +++ b/gcc/testsuite/gcc.dg/c2x-utf8str-type.c @@ -0,0 +1,6 @@ +/* Test C2X UTF-8 string literal type. */ +/* { dg-do compile } */ +/* { dg-options "-std=c2x" } */ + +_Static_assert (_Generic (u8"text", unsigned char*: 1, default: 2) == 1, "UTF-8 string literals have an unexpected type"); +_Static_assert (_Generic (u8"x"[0], unsigned char: 1, default: 2) == 1, "UTF-8 string literal elements have an unexpected type"); diff --git a/gcc/testsuite/gcc.dg/c2x-utf8str.c b/gcc/testsuite/gcc.dg/c2x-utf8str.c new file mode 100644 index 0000000000000000000000000000000000000000..2e4c392da9f7650c1261a4c7af0a5a930cab587b --- /dev/null +++ b/gcc/testsuite/gcc.dg/c2x-utf8str.c @@ -0,0 +1,34 @@ +/* Test initialization by UTF-8 string literal in C2X. */ +/* { dg-do compile } */ +/* { dg-require-effective-target wchar } */ +/* { dg-options "-std=c2x" } */ + +typedef __CHAR8_TYPE__ char8_t; +typedef __CHAR16_TYPE__ char16_t; +typedef __CHAR32_TYPE__ char32_t; +typedef __WCHAR_TYPE__ wchar_t; + +/* Test that char, signed char, unsigned char, and char8_t arrays can be + initialized by a UTF-8 string literal. */ +const char cbuf1[] = u8"text"; +const char cbuf2[] = { u8"text" }; +const signed char scbuf1[] = u8"text"; +const signed char scbuf2[] = { u8"text" }; +const unsigned char ucbuf1[] = u8"text"; +const unsigned char ucbuf2[] = { u8"text" }; +const char8_t c8buf1[] = u8"text"; +const char8_t c8buf2[] = { u8"text" }; + +/* Test that a diagnostic is issued for attempted initialization of + other character types by a UTF-8 string literal. */ +const char16_t c16buf1[] = u8"text"; /* { dg-error "from a string literal with type array of .unsigned char." } */ +const char16_t c16buf2[] = { u8"text" }; /* { dg-error "from a string literal with type array of .unsigned char." } */ +const char32_t c32buf1[] = u8"text"; /* { dg-error "from a string literal with type array of .unsigned char." } */ +const char32_t c32buf2[] = { u8"text" }; /* { dg-error "from a string literal with type array of .unsigned char." } */ +const wchar_t wbuf1[] = u8"text"; /* { dg-error "from a string literal with type array of .unsigned char." } */ +const wchar_t wbuf2[] = { u8"text" }; /* { dg-error "from a string literal with type array of .unsigned char." } */ + +/* Test that char8_t arrays can be initialized by an ordinary string + literal. */ +const char8_t c8buf3[] = "text"; +const char8_t c8buf4[] = { "text" }; diff --git a/gcc/testsuite/gcc.dg/gnu2x-utf8str-type.c b/gcc/testsuite/gcc.dg/gnu2x-utf8str-type.c new file mode 100644 index 0000000000000000000000000000000000000000..efe16ffc28d50c1bd4f2158f61b24d7e4b68e5e2 --- /dev/null +++ b/gcc/testsuite/gcc.dg/gnu2x-utf8str-type.c @@ -0,0 +1,5 @@ +/* Test C2X UTF-8 string literal type with -std=gnu2x. */ +/* { dg-do compile } */ +/* { dg-options "-std=gnu2x" } */ + +#include "c2x-utf8str-type.c" diff --git a/gcc/testsuite/gcc.dg/gnu2x-utf8str.c b/gcc/testsuite/gcc.dg/gnu2x-utf8str.c new file mode 100644 index 0000000000000000000000000000000000000000..f3719ea8c774c62806dba425187c928fddf3256b --- /dev/null +++ b/gcc/testsuite/gcc.dg/gnu2x-utf8str.c @@ -0,0 +1,34 @@ +/* Test initialization by UTF-8 string literal in C2X with -std=gnu2x. */ +/* { dg-do compile } */ +/* { dg-require-effective-target wchar } */ +/* { dg-options "-std=gnu2x" } */ + +typedef __CHAR8_TYPE__ char8_t; +typedef __CHAR16_TYPE__ char16_t; +typedef __CHAR32_TYPE__ char32_t; +typedef __WCHAR_TYPE__ wchar_t; + +/* Test that char, signed char, unsigned char, and char8_t arrays can be + initialized by a UTF-8 string literal. */ +const char cbuf1[] = u8"text"; +const char cbuf2[] = { u8"text" }; +const signed char scbuf1[] = u8"text"; +const signed char scbuf2[] = { u8"text" }; +const unsigned char ucbuf1[] = u8"text"; +const unsigned char ucbuf2[] = { u8"text" }; +const char8_t c8buf1[] = u8"text"; +const char8_t c8buf2[] = { u8"text" }; + +/* Test that a diagnostic is issued for attempted initialization of + other character types by a UTF-8 string literal. */ +const char16_t c16buf1[] = u8"text"; /* { dg-error "from a string literal with type array of .unsigned char." } */ +const char16_t c16buf2[] = { u8"text" }; /* { dg-error "from a string literal with type array of .unsigned char." } */ +const char32_t c32buf1[] = u8"text"; /* { dg-error "from a string literal with type array of .unsigned char." } */ +const char32_t c32buf2[] = { u8"text" }; /* { dg-error "from a string literal with type array of .unsigned char." } */ +const wchar_t wbuf1[] = u8"text"; /* { dg-error "from a string literal with type array of .unsigned char." } */ +const wchar_t wbuf2[] = { u8"text" }; /* { dg-error "from a string literal with type array of .unsigned char." } */ + +/* Test that char8_t arrays can be initialized by an ordinary string + literal. */ +const char8_t c8buf3[] = "text"; +const char8_t c8buf4[] = { "text" };