From 30c99a9e194d987fce5271cacc16adf54f7c1bdf Mon Sep 17 00:00:00 2001
From: Jason Merrill <jason@redhat.com>
Date: Fri, 9 Oct 2009 20:39:46 -0400
Subject: [PATCH] * charset.c (_cpp_valid_ucn): Update C++0x restrictions.

From-SVN: r152614
---
 gcc/testsuite/ChangeLog          |  7 +++++++
 gcc/testsuite/g++.dg/cpp/ucn-1.C | 13 +++++++++++++
 libcpp/ChangeLog                 |  4 ++++
 libcpp/charset.c                 | 21 +++++++++++++++------
 4 files changed, 39 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp/ucn-1.C

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 3875b1dbaf16..534d797e04de 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2009-10-09  Jason Merrill  <jason@redhat.com>
+
+	* g++.dg/cpp/ucn-1.C: New.
+
+2009-10-08  Jason Merrill  <jason@redhat.com>
+
 2009-10-09  Janus Weil  <janus@gcc.gnu.org>
 
 	PR fortran/41585
@@ -60,6 +66,7 @@
 
 2009-10-08  Jason Merrill  <jason@redhat.com>
 
+	PR c++/36816
 	* g++.dg/cpp0x/rv-deduce.C: New.
 
 	PR c++/37177
diff --git a/gcc/testsuite/g++.dg/cpp/ucn-1.C b/gcc/testsuite/g++.dg/cpp/ucn-1.C
new file mode 100644
index 000000000000..354e1d976b05
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/ucn-1.C
@@ -0,0 +1,13 @@
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2170.html
+// { dg-options "-std=c++0x -fextended-identifiers" }
+
+int main()
+{
+  "\u0041";			// 'A' UCN is OK in string literal
+  '\u0041';			// also OK in character literal
+
+  int c\u0041c;		      // { dg-error "not valid in an identifier" }
+  int c\u0024c;		      // $ is OK; not part of basic source char set
+
+  U"\uD800";		  // { dg-error "not a valid universal character" }
+}
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog
index 4672abed7501..9d9556ee0adc 100644
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,3 +1,7 @@
+2009-10-09  Jason Merrill  <jason@redhat.com>
+
+	* charset.c (_cpp_valid_ucn): Update C++0x restrictions.
+
 2009-10-09  Neil Vachharajani <nvachhar@google.com>
 
 	* directives.c (DIRECTIVE_TABLE): Remove DEPRECATED from ident and
diff --git a/libcpp/charset.c b/libcpp/charset.c
index b96c646f58e8..bd24ec2490d1 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -948,10 +948,16 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
    ISO/IEC 10646 is NNNNNNNN; the character designated by the
    universal character name \uNNNN is that character whose character
    short name in ISO/IEC 10646 is 0000NNNN.  If the hexadecimal value
-   for a universal character name is less than 0x20 or in the range
-   0x7F-0x9F (inclusive), or if the universal character name
-   designates a character in the basic source character set, then the
-   program is ill-formed.
+   for a universal character name corresponds to a surrogate code point
+   (in the range 0xD800-0xDFFF, inclusive), the program is ill-formed.
+   Additionally, if the hexadecimal value for a universal-character-name
+   outside a character or string literal corresponds to a control character
+   (in either of the ranges 0x00-0x1F or 0x7F-0x9F, both inclusive) or to a
+   character in the basic source character set, the program is ill-formed.
+
+   C99 6.4.3: A universal character name shall not specify a character
+   whose short identifier is less than 00A0 other than 0024 ($), 0040 (@),
+   or 0060 (`), nor one in the range D800 through DFFF inclusive.
 
    *PSTR must be preceded by "\u" or "\U"; it is assumed that the
    buffer end is delimited by a non-hex digit.  Returns zero if the
@@ -1018,9 +1024,12 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
 		 (int) (str - base), base);
       result = 1;
     }
-  /* The standard permits $, @ and ` to be specified as UCNs.  We use
-     hex escapes so that this also works with EBCDIC hosts.  */
+  /* The C99 standard permits $, @ and ` to be specified as UCNs.  We use
+     hex escapes so that this also works with EBCDIC hosts.
+     C++0x permits everything below 0xa0 within literals;
+     ucn_valid_in_identifier will complain about identifiers.  */
   else if ((result < 0xa0
+	    && !CPP_OPTION (pfile, cplusplus)
 	    && (result != 0x24 && result != 0x40 && result != 0x60))
 	   || (result & 0x80000000)
 	   || (result >= 0xD800 && result <= 0xDFFF))
-- 
GitLab