From 2c3fcba6dc56f848a07332ff005e2db9054dca26 Mon Sep 17 00:00:00 2001
From: Zack Weinberg <zack@gcc.gnu.org>
Date: Mon, 10 Sep 2001 22:34:03 +0000
Subject: [PATCH] cpplex.c (parse_identifier): Fast-path optimize.

	* cpplex.c (parse_identifier): Fast-path optimize.  Avoid
	copying identifier when we're just going to throw it away.
	(parse_identifier_slow): New routine to handle abnormal cases.
	(_cpp_lex_token): Update call site.

	* hashtable.c (ht_lookup): Don't assume that the string we've
	been given is NUL-terminated.
	* system.h: #define __builtin_expect(a, b) to (a) if not
	GCC >=3.0.

From-SVN: r45529
---
 gcc/ChangeLog   |  18 +++++--
 gcc/cpplex.c    | 128 +++++++++++++++++++++++++++++++++---------------
 gcc/hashtable.c |   2 +-
 gcc/system.h    |   7 +++
 4 files changed, 111 insertions(+), 44 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b2edebd3533c..fed56b5f5d36 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2001-09-10  Zack Weinberg  <zackw@panix.com>
+
+	* cpplex.c (parse_identifier): Fast-path optimize.  Avoid
+	copying identifier when we're just going to throw it away.
+	(parse_identifier_slow): New routine to handle abnormal cases.
+	(_cpp_lex_token): Update call site.
+
+	* hashtable.c (ht_lookup): Don't assume that the string we've
+	been given is NUL-terminated.
+	* system.h: #define __builtin_expect(a, b) to (a) if not
+	GCC >=3.0.
+
 2001-09-10  Michael Meissner  <meissner@redhat.com>
 
 	* config.gcc (sparc64-*-solaris2): Add alias to be compatible with
@@ -16,7 +28,7 @@ Mon Sep 10 16:26:44 2001  Richard Kenner  <kenner@vlsi1.ultra.nyu.edu>
 
 	* dwarf2out.c (incomplete_types, decl_scope_table): Make them
 	into varray's and register them as roots with the garbage
-	collector so they are not collected too soon.  
+	collector so they are not collected too soon.
 
 Mon Sep 10 14:21:26 CEST 2001  Jan Hubicka  <jh@suse.cz>
 
@@ -31,7 +43,7 @@ Mon Sep 10 14:21:26 CEST 2001  Jan Hubicka  <jh@suse.cz>
 	(basic_block_for_insn, label_value_list): Move from flow.c; make global.
 	(n_basic_blocks, n_edges, basic_block_info, entry_exit_blocks,
 	init_flow, clear_edges, can_delete_note_p, can_delete_label_p,
-	flow_delete_insn, flow_delete_insn_chain, create_basic_block, 
+	flow_delete_insn, flow_delete_insn_chain, create_basic_block,
 	expunge_block, flow_delete_block, compute_bb_for_insn,
 	update_bb_for_insn, set_block_for_insn, set_block_for_new_insns,
 	make_edge, remove_edge, redirect_edge_succ, redirect_edge_succ_nodup,
@@ -40,7 +52,7 @@ Mon Sep 10 14:21:26 CEST 2001  Jan Hubicka  <jh@suse.cz>
 	redirect_edge_and_branch, redirect_edge_and_branch_force,
 	tidy_fallthru_edge, tidy_fallthru_edges, back_edge_of_syntactic_loop_p,
 	split_edge, insert_insn_on_edge, commit_one_edge_insertion,
-	commit_edge_insertions, dump_flow_info, debug_flow_info, 
+	commit_edge_insertions, dump_flow_info, debug_flow_info,
 	dump_edge_info, dump_bb, debug_bb, debug_bb_n, print_rtl_with_bb,
 	verify_flow_info, purge_dead_edges, purge_all_dead_edges):
 	Move here from flow.c
diff --git a/gcc/cpplex.c b/gcc/cpplex.c
index 03bd85516cc4..071cdca0c4dc 100644
--- a/gcc/cpplex.c
+++ b/gcc/cpplex.c
@@ -88,7 +88,9 @@ static int skip_block_comment PARAMS ((cpp_reader *));
 static int skip_line_comment PARAMS ((cpp_reader *));
 static void adjust_column PARAMS ((cpp_reader *));
 static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
-static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
+static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *));
+static cpp_hashnode *parse_identifier_slow PARAMS ((cpp_reader *,
+						    const U_CHAR *));
 static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
 static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
 static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
@@ -470,40 +472,101 @@ name_p (pfile, string)
   return 1;  
 }
 
-/* Parse an identifier, skipping embedded backslash-newlines.
-   Calculate the hash value of the token while parsing, for improved
-   performance.  The hashing algorithm *must* match cpp_lookup().  */
+/* Parse an identifier, skipping embedded backslash-newlines.  This is
+   a critical inner loop.  The common case is an identifier which has
+   not been split by backslash-newline, does not contain a dollar
+   sign, and has already been scanned (roughly 10:1 ratio of
+   seen:unseen identifiers in normal code; the distribution is
+   Poisson-like).  Second most common case is a new identifier, not
+   split and no dollar sign.  The other possibilities are rare and
+   have been relegated to parse_identifier_slow.  */
 
 static cpp_hashnode *
-parse_identifier (pfile, c)
+parse_identifier (pfile)
      cpp_reader *pfile;
-     cppchar_t c;
 {
   cpp_hashnode *result;
+  const U_CHAR *cur, *rlimit;
+
+  /* Fast-path loop.  Skim over a normal identifier.
+     N.B. ISIDNUM does not include $.  */
+  cur    = pfile->buffer->cur - 1;
+  rlimit = pfile->buffer->rlimit;
+  do
+    cur++;
+  while (cur < rlimit && ISIDNUM (*cur));
+
+  /* Check for slow-path cases.  */
+  if (cur < rlimit && (*cur == '?' || *cur == '\\' || *cur == '$'))
+    result = parse_identifier_slow (pfile, cur);
+  else
+    {
+      const U_CHAR *base = pfile->buffer->cur - 1;
+      result = (cpp_hashnode *)
+	ht_lookup (pfile->hash_table, base, cur - base, HT_ALLOC);
+      pfile->buffer->cur = cur;
+    }
+
+  /* Rarely, identifiers require diagnostics when lexed.
+     XXX Has to be forced out of the fast path.  */
+  if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
+			&& !pfile->state.skipping, 0))
+    {
+      /* It is allowed to poison the same identifier twice.  */
+      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
+	cpp_error (pfile, "attempt to use poisoned \"%s\"",
+		   NODE_NAME (result));
+
+      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
+	 replacement list of a variadic macro.  */
+      if (result == pfile->spec_nodes.n__VA_ARGS__
+	  && !pfile->state.va_args_ok)
+	cpp_pedwarn (pfile,
+	"__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
+    }
+
+  return result;
+}
+
+/* Slow path.  This handles identifiers which have been split, and
+   identifiers which contain dollar signs.  The part of the identifier
+   from PFILE->buffer->cur-1 to CUR has already been scanned.  */
+static cpp_hashnode *
+parse_identifier_slow (pfile, cur)
+     cpp_reader *pfile;
+     const U_CHAR *cur;
+{
   cpp_buffer *buffer = pfile->buffer;
-  unsigned int saw_dollar = 0, len;
+  const U_CHAR *base = buffer->cur - 1;
   struct obstack *stack = &pfile->hash_table->stack;
+  unsigned int c, saw_dollar = 0, len;
+
+  /* Copy the part of the token which is known to be okay.  */
+  obstack_grow (stack, base, cur - base);
 
+  /* Now process the part which isn't.  We are looking at one of
+     '$', '\\', or '?' on entry to this loop.  */
+  c = *cur++;
+  buffer->cur = cur;
   do
     {
-      do
-	{
-	  obstack_1grow (stack, c);
+      while (is_idchar (c))
+        {
+          obstack_1grow (stack, c);
 
-	  if (c == '$')
-	    saw_dollar++;
+          if (c == '$')
+            saw_dollar++;
 
-	  c = EOF;
-	  if (buffer->cur == buffer->rlimit)
-	    break;
+          c = EOF;
+          if (buffer->cur == buffer->rlimit)
+            break;
 
-	  c = *buffer->cur++;
-	}
-      while (is_idchar (c));
+          c = *buffer->cur++;
+        }
 
       /* Potential escaped newline?  */
       if (c != '?' && c != '\\')
-	break;
+        break;
       c = skip_escaped_newlines (pfile, c);
     }
   while (is_idchar (c));
@@ -521,26 +584,8 @@ parse_identifier (pfile, c)
   len = obstack_object_size (stack);
   obstack_1grow (stack, '\0');
 
-  /* This routine commits the memory if necessary.  */
-  result = (cpp_hashnode *)
+  return (cpp_hashnode *)
     ht_lookup (pfile->hash_table, obstack_finish (stack), len, HT_ALLOCED);
-
-  /* Some identifiers require diagnostics when lexed.  */
-  if (result->flags & NODE_DIAGNOSTIC && !pfile->state.skipping)
-    {
-      /* It is allowed to poison the same identifier twice.  */
-      if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
-	cpp_error (pfile, "attempt to use poisoned \"%s\"",
-		   NODE_NAME (result));
-
-      /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
-	 replacement list of a variadic macro.  */
-      if (result == pfile->spec_nodes.n__VA_ARGS__
-	  && !pfile->state.va_args_ok)
-	cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variadic macro");
-    }
-
-  return result;
 }
 
 /* Parse a number, skipping embedded backslash-newlines.  */
@@ -1003,14 +1048,17 @@ _cpp_lex_token (pfile, result)
     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
     case 'Y': case 'Z':
       result->type = CPP_NAME;
-      result->val.node = parse_identifier (pfile, c);
+      result->val.node = parse_identifier (pfile);
 
       /* 'L' may introduce wide characters or strings.  */
       if (result->val.node == pfile->spec_nodes.n_L)
 	{
-	  c = buffer->read_ahead; /* For make_string.  */
+	  c = buffer->read_ahead;
+	  if (c == EOF && buffer->cur < buffer->rlimit)
+	    c = *buffer->cur;
 	  if (c == '\'' || c == '"')
 	    {
+	      buffer->cur++;
 	      ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
 	      goto make_string;
 	    }
diff --git a/gcc/hashtable.c b/gcc/hashtable.c
index 7d0359a2c613..bd2b13727bb8 100644
--- a/gcc/hashtable.c
+++ b/gcc/hashtable.c
@@ -162,7 +162,7 @@ ht_lookup (table, str, len, insert)
 
   HT_LEN (node) = len;
   if (insert == HT_ALLOC)
-    HT_STR (node) = obstack_copy (&table->stack, str, len + 1);
+    HT_STR (node) = obstack_copy0 (&table->stack, str, len);
   else
     HT_STR (node) = str;
 
diff --git a/gcc/system.h b/gcc/system.h
index 6b580830e486..7938fb6bdf29 100644
--- a/gcc/system.h
+++ b/gcc/system.h
@@ -490,6 +490,13 @@ extern void abort PARAMS ((void));
 #endif /* ! __FUNCTION__ */
 #endif
 
+/* __builtin_expect(A, B) evaluates to A, but notifies the compiler that
+   the most likely value of A is B.  This feature was added at some point
+   between 2.95 and 3.0.  Let's use 3.0 as the lower bound for now.  */
+#if (GCC_VERSION < 3000)
+#define __builtin_expect(a, b) (a)
+#endif
+
 /* Provide some sort of boolean type.  We use stdbool.h if it's
   available.  This must be after all inclusion of system headers,
   as some of them will mess us up.  */
-- 
GitLab