From 0669295b1e7991bb5465267d4bd06576883e648b Mon Sep 17 00:00:00 2001
From: Andrew MacLeod <amacleod@redhat.com>
Date: Mon, 7 Nov 2011 20:06:39 +0000
Subject: [PATCH] atomic_base.h (atomic_thread_fence): Call builtin.

2011-11-07  Andrew MacLeod  <amacleod@redhat.com>

	libstdc++-v3
	* include/bits/atomic_base.h (atomic_thread_fence): Call builtin.
	(atomic_signal_fence): Call builtin.
	(atomic_flag::test_and_set): Call __atomic_exchange when it is lockfree,
	otherwise fall back to call __sync_lock_test_and_set.
	(atomic_flag::clear): Call __atomic_store when it is lockfree,
	otherwise fall back to call __sync_lock_release.

	gcc
	* doc/extend.texi: Docuemnt behaviour change for __atomic_exchange and
	__atomic_store.
	* optabs.c (expand_atomic_exchange): Expand to __sync_lock_test_and_set
	only when originated from that builtin.
	(expand_atomic_store): Expand to __sync_lock_release when originated
	from that builtin.
	* builtins.c (expand_builtin_sync_lock_test_and_set): Add flag that
	expand_atomic_exchange call originated from here.
	(expand_builtin_sync_lock_release): Add flag that expand_atomic_store
	call originated from here.
	(expand_builtin_atomic_exchange): Add origination flag.
	(expand_builtin_atomic_store): Add origination flag.
	* expr.h (expand_atomic_exchange, expand_atomic_store): Add boolean
	parameters to indicate implementation fall back options.

From-SVN: r181111
---
 gcc/ChangeLog                           | 17 ++++++
 gcc/builtins.c                          |  8 +--
 gcc/doc/extend.texi                     |  8 +--
 gcc/expr.h                              |  4 +-
 gcc/optabs.c                            | 78 +++++++++++++++++--------
 libstdc++-v3/ChangeLog                  |  9 +++
 libstdc++-v3/include/bits/atomic_base.h | 60 +++++++++++++++++--
 7 files changed, 141 insertions(+), 43 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index bce55bc4d722..de26cb51dcea 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,20 @@
+2011-11-07  Andrew MacLeod  <amacleod@redhat.com>
+
+	* doc/extend.texi: Docuemnt behaviour change for __atomic_exchange and
+	__atomic_store.
+	* optabs.c (expand_atomic_exchange): Expand to __sync_lock_test_and_set
+	only when originated from that builtin.
+	(expand_atomic_store): Expand to __sync_lock_release when originated
+	from that builtin.
+	* builtins.c (expand_builtin_sync_lock_test_and_set): Add flag that
+	expand_atomic_exchange call originated from here.
+	(expand_builtin_sync_lock_release): Add flag that expand_atomic_store
+	call originated from here.
+	(expand_builtin_atomic_exchange): Add origination flag.
+	(expand_builtin_atomic_store): Add origination flag.
+	* expr.h (expand_atomic_exchange, expand_atomic_store): Add boolean 
+	parameters to indicate implementation fall back options.
+
 2011-11-07  Georg-Johann Lay  <avr@gjlay.de>
 
 	* config/avr/avr.c (output_reload_in_const): Can handle CONSTANT_P
diff --git a/gcc/builtins.c b/gcc/builtins.c
index dc9fe78599ce..205d586fc332 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -5221,7 +5221,7 @@ expand_builtin_sync_lock_test_and_set (enum machine_mode mode, tree exp,
   mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
   val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
 
-  return expand_atomic_exchange (target, mem, val, MEMMODEL_ACQUIRE);
+  return expand_atomic_exchange (target, mem, val, MEMMODEL_ACQUIRE, true);
 }
 
 /* Expand the __sync_lock_release intrinsic.  EXP is the CALL_EXPR.  */
@@ -5234,7 +5234,7 @@ expand_builtin_sync_lock_release (enum machine_mode mode, tree exp)
   /* Expand the operands.  */
   mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
 
-  expand_atomic_store (mem, const0_rtx, MEMMODEL_RELEASE);
+  expand_atomic_store (mem, const0_rtx, MEMMODEL_RELEASE, true);
 }
 
 /* Given an integer representing an ``enum memmodel'', verify its
@@ -5285,7 +5285,7 @@ expand_builtin_atomic_exchange (enum machine_mode mode, tree exp, rtx target)
   mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
   val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
 
-  return expand_atomic_exchange (target, mem, val, model);
+  return expand_atomic_exchange (target, mem, val, model, false);
 }
 
 /* Expand the __atomic_compare_exchange intrinsic:
@@ -5402,7 +5402,7 @@ expand_builtin_atomic_store (enum machine_mode mode, tree exp)
   mem = get_builtin_sync_mem (CALL_EXPR_ARG (exp, 0), mode);
   val = expand_expr_force_mode (CALL_EXPR_ARG (exp, 1), mode);
 
-  return expand_atomic_store (mem, val, model);
+  return expand_atomic_store (mem, val, model, false);
 }
 
 /* Expand the __atomic_fetch_XXX intrinsic:
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 91e4e328f92a..c7e8ede9cb06 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -6910,9 +6910,7 @@ contents of @code{*@var{ptr}} in @code{*@var{ret}}.
 
 @deftypefn {Built-in Function} void __atomic_store_n (@var{type} *ptr, @var{type} val, int memmodel)
 This built-in function implements an atomic store operation.  It writes 
-@code{@var{val}} into @code{*@var{ptr}}.  On targets which are limited,
-0 may be the only valid value. This mimics the behaviour of
-@code{__sync_lock_release} on such hardware.
+@code{@var{val}} into @code{*@var{ptr}}.  
 
 The valid memory model variants are
 @code{__ATOMIC_RELAXED}, @code{__ATOMIC_SEQ_CST}, and @code{__ATOMIC_RELEASE}.
@@ -6930,10 +6928,6 @@ This built-in function implements an atomic exchange operation.  It writes
 @var{val} into @code{*@var{ptr}}, and returns the previous contents of
 @code{*@var{ptr}}.
 
-On targets which are limited, a value of 1 may be the only valid value
-written.  This mimics the behaviour of @code{__sync_lock_test_and_set} on
-such hardware.
-
 The valid memory model variants are
 @code{__ATOMIC_RELAXED}, @code{__ATOMIC_SEQ_CST}, @code{__ATOMIC_ACQUIRE},
 @code{__ATOMIC_RELEASE}, and @code{__ATOMIC_ACQ_REL}.
diff --git a/gcc/expr.h b/gcc/expr.h
index 1623ad97a730..2cc8152c7403 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -215,9 +215,9 @@ rtx emit_conditional_add (rtx, enum rtx_code, rtx, rtx, enum machine_mode,
 rtx expand_sync_operation (rtx, rtx, enum rtx_code);
 rtx expand_sync_fetch_operation (rtx, rtx, enum rtx_code, bool, rtx);
 
-rtx expand_atomic_exchange (rtx, rtx, rtx, enum memmodel);
+rtx expand_atomic_exchange (rtx, rtx, rtx, enum memmodel, bool);
 rtx expand_atomic_load (rtx, rtx, enum memmodel);
-rtx expand_atomic_store (rtx, rtx, enum memmodel);
+rtx expand_atomic_store (rtx, rtx, enum memmodel, bool);
 rtx expand_atomic_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel, 
 			      bool);
 void expand_atomic_thread_fence (enum memmodel);
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 163a44972ab2..7901b95f6321 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -7256,10 +7256,13 @@ expand_compare_and_swap_loop (rtx mem, rtx old_reg, rtx new_reg, rtx seq)
    atomically store VAL in MEM and return the previous value in MEM.
 
    MEMMODEL is the memory model variant to use.
-   TARGET is an option place to stick the return value.  */
+   TARGET is an optional place to stick the return value.  
+   USE_TEST_AND_SET indicates whether __sync_lock_test_and_set should be used
+   as a fall back if the atomic_exchange pattern does not exist.  */
 
 rtx
-expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model)
+expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model,
+			bool use_test_and_set)			
 {
   enum machine_mode mode = GET_MODE (mem);
   enum insn_code icode;
@@ -7284,31 +7287,39 @@ expand_atomic_exchange (rtx target, rtx mem, rtx val, enum memmodel model)
      acquire barrier.  If the pattern exists, and the memory model is stronger
      than acquire, add a release barrier before the instruction.
      The barrier is not needed if sync_lock_test_and_set doesn't exist since
-     it will expand into a compare-and-swap loop.  */
+     it will expand into a compare-and-swap loop.
 
-  icode = direct_optab_handler (sync_lock_test_and_set_optab, mode);
-  last_insn = get_last_insn ();
-  if ((icode != CODE_FOR_nothing) && (model == MEMMODEL_SEQ_CST || 
-				      model == MEMMODEL_RELEASE ||
-				      model == MEMMODEL_ACQ_REL))
-    expand_builtin_mem_thread_fence (model);
+     Some targets have non-compliant test_and_sets, so it would be incorrect
+     to emit a test_and_set in place of an __atomic_exchange.  The test_and_set
+     builtin shares this expander since exchange can always replace the
+     test_and_set.  */
 
-  if (icode != CODE_FOR_nothing)
+  if (use_test_and_set)
     {
-      struct expand_operand ops[3];
+      icode = direct_optab_handler (sync_lock_test_and_set_optab, mode);
+      last_insn = get_last_insn ();
+      if ((icode != CODE_FOR_nothing) && (model == MEMMODEL_SEQ_CST || 
+					  model == MEMMODEL_RELEASE ||
+					  model == MEMMODEL_ACQ_REL))
+	expand_builtin_mem_thread_fence (model);
 
-      create_output_operand (&ops[0], target, mode);
-      create_fixed_operand (&ops[1], mem);
-      /* VAL may have been promoted to a wider mode.  Shrink it if so.  */
-      create_convert_operand_to (&ops[2], val, mode, true);
-      if (maybe_expand_insn (icode, 3, ops))
-	return ops[0].value;
-    }
+      if (icode != CODE_FOR_nothing)
+	{
+	  struct expand_operand ops[3];
+
+	  create_output_operand (&ops[0], target, mode);
+	  create_fixed_operand (&ops[1], mem);
+	  /* VAL may have been promoted to a wider mode.  Shrink it if so.  */
+	  create_convert_operand_to (&ops[2], val, mode, true);
+	  if (maybe_expand_insn (icode, 3, ops))
+	    return ops[0].value;
+	}
 
-  /* Remove any fence we may have inserted since a compare and swap loop is a
-     full memory barrier.  */
-  if (last_insn != get_last_insn ())
-    delete_insns_since (last_insn);
+      /* Remove any fence that was inserted since a compare and swap loop is
+	 already a full memory barrier.  */
+      if (last_insn != get_last_insn ())
+	delete_insns_since (last_insn);
+    }
 
   /* Otherwise, use a compare-and-swap loop for the exchange.  */
   if (can_compare_and_swap_p (mode))
@@ -7489,10 +7500,11 @@ expand_atomic_load (rtx target, rtx mem, enum memmodel model)
 /* This function expands the atomic store operation:
    Atomically store VAL in MEM.
    MEMMODEL is the memory model variant to use.
+   USE_RELEASE is true if __sync_lock_release can be used as a fall back.
    function returns const0_rtx if a pattern was emitted.  */
 
 rtx
-expand_atomic_store (rtx mem, rtx val, enum memmodel model)
+expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release)
 {
   enum machine_mode mode = GET_MODE (mem);
   enum insn_code icode;
@@ -7509,12 +7521,30 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model)
 	return const0_rtx;
     }
 
+  /* If using __sync_lock_release is a viable alternative, try it.  */
+  if (use_release)
+    {
+      icode = direct_optab_handler (sync_lock_release_optab, mode);
+      if (icode != CODE_FOR_nothing)
+	{
+	  create_fixed_operand (&ops[0], mem);
+	  create_input_operand (&ops[1], const0_rtx, mode);
+	  if (maybe_expand_insn (icode, 2, ops))
+	    {
+	      /* lock_release is only a release barrier.  */
+	      if (model == MEMMODEL_SEQ_CST)
+		expand_builtin_mem_thread_fence (model);
+	      return const0_rtx;
+	    }
+	}
+    }
+
   /* If the size of the object is greater than word size on this target,
      a default store will not be atomic, Try a mem_exchange and throw away
      the result.  If that doesn't work, don't do anything.  */
   if (GET_MODE_PRECISION(mode) > BITS_PER_WORD)
     {
-      rtx target = expand_atomic_exchange (NULL_RTX, mem, val, model);
+      rtx target = expand_atomic_exchange (NULL_RTX, mem, val, model, false);
       if (target)
         return const0_rtx;
       else
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index abcee817a4bb..f28bc5c4b420 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,12 @@
+2011-11-07  Andrew MacLeod  <amacleod@redhat.com>
+
+	* include/bits/atomic_base.h (atomic_thread_fence): Call builtin.
+	(atomic_signal_fence): Call builtin.
+	(atomic_flag::test_and_set): Call __atomic_exchange when it is lockfree,
+	otherwise fall back to call __sync_lock_test_and_set.
+	(atomic_flag::clear): Call __atomic_store when it is lockfree,
+	otherwise fall back to call __sync_lock_release.
+
 2011-11-07  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
 
 	PR bootstrap/50982
diff --git a/libstdc++-v3/include/bits/atomic_base.h b/libstdc++-v3/include/bits/atomic_base.h
index 2e4a6a1894a9..e297eb0e6ada 100644
--- a/libstdc++-v3/include/bits/atomic_base.h
+++ b/libstdc++-v3/include/bits/atomic_base.h
@@ -69,10 +69,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   }
 
   void
-  atomic_thread_fence(memory_order) noexcept;
+  atomic_thread_fence(memory_order __m) noexcept
+  {
+    __atomic_thread_fence (__m);
+  }
 
   void
-  atomic_signal_fence(memory_order) noexcept;
+  atomic_signal_fence(memory_order __m) noexcept
+  {
+    __atomic_signal_fence (__m);
+  }
 
   /// kill_dependency
   template<typename _Tp>
@@ -261,13 +267,35 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     bool
     test_and_set(memory_order __m = memory_order_seq_cst) noexcept
     {
-      return __atomic_exchange_n(&_M_i, 1, __m);
+      /* The standard *requires* this to be lock free.  If exchange is not
+	 always lock free, the resort to the old test_and_set.  */
+      if (__atomic_always_lock_free (sizeof (_M_i), 0))
+	return __atomic_exchange_n(&_M_i, 1, __m);
+      else
+        {
+	  /* Sync test and set is only guaranteed to be acquire.  */
+	  if (__m == memory_order_seq_cst || __m == memory_order_release
+	      || __m == memory_order_acq_rel)
+	    atomic_thread_fence (__m);
+	  return __sync_lock_test_and_set (&_M_i, 1);
+	}
     }
 
     bool
     test_and_set(memory_order __m = memory_order_seq_cst) volatile noexcept
     {
-      return __atomic_exchange_n(&_M_i, 1, __m);
+      /* The standard *requires* this to be lock free.  If exchange is not
+	 always lock free, the resort to the old test_and_set.  */
+      if (__atomic_always_lock_free (sizeof (_M_i), 0))
+	return __atomic_exchange_n(&_M_i, 1, __m);
+      else
+        {
+	  /* Sync test and set is only guaranteed to be acquire.  */
+	  if (__m == memory_order_seq_cst || __m == memory_order_release
+	      || __m == memory_order_acq_rel)
+	    atomic_thread_fence (__m);
+	  return __sync_lock_test_and_set (&_M_i, 1);
+	}
     }
 
     void
@@ -277,7 +305,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       __glibcxx_assert(__m != memory_order_acquire);
       __glibcxx_assert(__m != memory_order_acq_rel);
 
-      __atomic_store_n(&_M_i, 0, __m);
+      /* The standard *requires* this to be lock free.  If store is not always
+	 lock free, the resort to the old style __sync_lock_release.  */
+      if (__atomic_always_lock_free (sizeof (_M_i), 0))
+	__atomic_store_n(&_M_i, 0, __m);
+      else
+        {
+	  __sync_lock_release (&_M_i, 0);
+	  /* __sync_lock_release is only guaranteed to be a release barrier.  */
+	  if (__m == memory_order_seq_cst)
+	    atomic_thread_fence (__m);
+	}
     }
 
     void
@@ -287,7 +325,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       __glibcxx_assert(__m != memory_order_acquire);
       __glibcxx_assert(__m != memory_order_acq_rel);
 
-      __atomic_store_n(&_M_i, 0, __m);
+      /* The standard *requires* this to be lock free.  If store is not always
+	 lock free, the resort to the old style __sync_lock_release.  */
+      if (__atomic_always_lock_free (sizeof (_M_i), 0))
+	__atomic_store_n(&_M_i, 0, __m);
+      else
+        {
+	  __sync_lock_release (&_M_i, 0);
+	  /* __sync_lock_release is only guaranteed to be a release barrier.  */
+	  if (__m == memory_order_seq_cst)
+	    atomic_thread_fence (__m);
+	}
     }
   };
 
-- 
GitLab