From 07b6642b935e1cd499a2aae071db06d845e39a8b Mon Sep 17 00:00:00 2001
From: Torvald Riegel <triegel@redhat.com>
Date: Fri, 13 Jan 2012 23:45:06 +0000
Subject: [PATCH] libitm: Filter out undo writes that overlap with the libitm
 stack.

	PR libitm/51855
	* config/generic/tls.h (GTM::mask_stack_top): New.
	(GTM::mask_stack_bottom): Declare.
	* config/generic/tls.c (GTM::mask_stack_bottom): New.
	* local.cc (gtm_undolog::rollback): Filter out any updates that
	overlap the libitm stack.  Add current transaction as parameter.
	* libitm_i.h (GTM::gtm_undolog::rollback): Adapt.
	* beginend.cc (GTM::gtm_thread::rollback): Adapt.
	* testsuite/libitm.c/stackundo.c: New test.

From-SVN: r183172
---
 libitm/ChangeLog                      | 12 ++++++++++++
 libitm/beginend.cc                    |  2 +-
 libitm/config/generic/tls.cc          |  7 +++++++
 libitm/config/generic/tls.h           | 17 +++++++++++++++++
 libitm/libitm_i.h                     |  2 +-
 libitm/local.cc                       | 27 +++++++++++++++++++++++----
 libitm/testsuite/libitm.c/stackundo.c | 23 +++++++++++++++++++++++
 7 files changed, 84 insertions(+), 6 deletions(-)
 create mode 100644 libitm/testsuite/libitm.c/stackundo.c

diff --git a/libitm/ChangeLog b/libitm/ChangeLog
index fa12e8c0fa62..8efaef5f3297 100644
--- a/libitm/ChangeLog
+++ b/libitm/ChangeLog
@@ -1,3 +1,15 @@
+2012-01-14  Torvald Riegel  <triegel@redhat.com>
+
+	PR libitm/51855
+	* config/generic/tls.h (GTM::mask_stack_top): New.
+	(GTM::mask_stack_bottom): Declare.
+	* config/generic/tls.c (GTM::mask_stack_bottom): New.
+	* local.cc (gtm_undolog::rollback): Filter out any updates that
+	overlap the libitm stack.  Add current transaction as parameter.
+	* libitm_i.h (GTM::gtm_undolog::rollback): Adapt.
+	* beginend.cc (GTM::gtm_thread::rollback): Adapt.
+	* testsuite/libitm.c/stackundo.c: New test.
+
 2012-01-10  Richard Henderson  <rth@redhat.com>
 
 	* libitm_i.h (_Unwind_DeleteException): Declare weak.
diff --git a/libitm/beginend.cc b/libitm/beginend.cc
index fe14f32d110c..08c2174ea676 100644
--- a/libitm/beginend.cc
+++ b/libitm/beginend.cc
@@ -327,7 +327,7 @@ GTM::gtm_thread::rollback (gtm_transaction_cp *cp, bool aborting)
   // data. Because of the latter, we have to roll it back before any
   // dispatch-specific rollback (which handles synchronization with other
   // transactions).
-  undolog.rollback (cp ? cp->undolog_size : 0);
+  undolog.rollback (this, cp ? cp->undolog_size : 0);
 
   // Perform dispatch-specific rollback.
   abi_disp()->rollback (cp);
diff --git a/libitm/config/generic/tls.cc b/libitm/config/generic/tls.cc
index e502e50869b4..f07d082b4eef 100644
--- a/libitm/config/generic/tls.cc
+++ b/libitm/config/generic/tls.cc
@@ -30,4 +30,11 @@ namespace GTM HIDDEN {
 __thread gtm_thread_tls _gtm_thr_tls;
 #endif
 
+// See tls.h for comments.
+void * __attribute__((noinline))
+mask_stack_bottom(gtm_thread *tx)
+{
+  return (uint8_t*)__builtin_dwarf_cfa() - 256;
+}
+
 } // namespace GTM
diff --git a/libitm/config/generic/tls.h b/libitm/config/generic/tls.h
index 6bbdccf2be17..dfcd76a0dac9 100644
--- a/libitm/config/generic/tls.h
+++ b/libitm/config/generic/tls.h
@@ -60,6 +60,23 @@ static inline abi_dispatch * abi_disp() { return _gtm_thr_tls.disp; }
 static inline void set_abi_disp(abi_dispatch *x) { _gtm_thr_tls.disp = x; }
 #endif
 
+#ifndef HAVE_ARCH_GTM_MASK_STACK
+// To filter out any updates that overlap the libitm stack, we define
+// gtm_mask_stack_top to the entry point to the library and
+// gtm_mask_stack_bottom to below the calling function (enforced with the
+// noinline attribute).  This definition should be fine for all
+// stack-grows-down architectures.
+// FIXME We fake the bottom to be lower so that we are safe even if we might
+// call further functions (compared to where we called gtm_mask_stack_bottom
+// in the call hierarchy) to actually undo or redo writes (e.g., memcpy).
+// This is a completely arbitrary value; can we instead ensure that there are
+// no such calls, or can we determine a future-proof value otherwise?
+static inline void *
+mask_stack_top(gtm_thread *tx) { return tx->jb.cfa; }
+void * __attribute__((noinline))
+mask_stack_bottom(gtm_thread *tx);
+#endif
+
 } // namespace GTM
 
 #endif // LIBITM_TLS_H
diff --git a/libitm/libitm_i.h b/libitm/libitm_i.h
index c00389d51150..bf17086ef2eb 100644
--- a/libitm/libitm_i.h
+++ b/libitm/libitm_i.h
@@ -142,7 +142,7 @@ struct gtm_undolog
   size_t size() const { return undolog.size(); }
 
   // In local.cc
-  void rollback (size_t until_size = 0);
+  void rollback (gtm_thread* tx, size_t until_size = 0);
 };
 
 // Contains all thread-specific data required by the entire library.
diff --git a/libitm/local.cc b/libitm/local.cc
index 39b6da3a5d20..5645a12bab89 100644
--- a/libitm/local.cc
+++ b/libitm/local.cc
@@ -26,11 +26,20 @@
 
 namespace GTM HIDDEN {
 
-
-void
-gtm_undolog::rollback (size_t until_size)
+// This function needs to be noinline because we need to prevent that it gets
+// inlined into another function that calls further functions. This could
+// break our assumption that we only call memcpy and thus only need to
+// additionally protect the memcpy stack (see the hack in mask_stack_bottom()).
+// Even if that isn't an issue because those other calls don't happen during
+// copying, we still need mask_stack_bottom() to be called "close" to the
+// memcpy in terms of stack frames, so just ensure that for now using the
+// noinline.
+void __attribute__((noinline))
+gtm_undolog::rollback (gtm_thread* tx, size_t until_size)
 {
   size_t i, n = undolog.size();
+  void *top = mask_stack_top(tx);
+  void *bot = mask_stack_bottom(tx);
 
   if (n > 0)
     {
@@ -40,7 +49,17 @@ gtm_undolog::rollback (size_t until_size)
           size_t len = undolog[i];
           size_t words = (len + sizeof(gtm_word) - 1) / sizeof(gtm_word);
           i -= words;
-          __builtin_memcpy (ptr, &undolog[i], len);
+          // Filter out any updates that overlap the libitm stack.  We don't
+          // bother filtering out just the overlapping bytes because we don't
+          // merge writes and thus any overlapping write is either bogus or
+          // would restore data on stack frames that are not in use anymore.
+          // FIXME The memcpy can/will end up as another call but we
+          // calculated BOT based on the current function.  Can we inline or
+          // reimplement this without too much trouble due to unaligned calls
+          // and still have good performance, so that we can remove the hack
+          // in mask_stack_bottom()?
+          if (likely(ptr > top || (uint8_t*)ptr + len <= bot))
+            __builtin_memcpy (ptr, &undolog[i], len);
 	}
     }
 }
diff --git a/libitm/testsuite/libitm.c/stackundo.c b/libitm/testsuite/libitm.c/stackundo.c
new file mode 100644
index 000000000000..02759d77014b
--- /dev/null
+++ b/libitm/testsuite/libitm.c/stackundo.c
@@ -0,0 +1,23 @@
+int __attribute__((noinline)) test2(int x[1000])
+{
+  int i;
+  return x[12];
+}
+
+int __attribute__((noinline)) test1()
+{
+  int x[1000], i;
+
+  for (i = 0; i < 1000; i++)
+    x[i] = i;
+  return test2(x);
+}
+
+int main()
+{
+  __transaction_atomic {
+    if (test1() !=0)
+      __transaction_cancel;
+  }
+  return 0;
+}
-- 
GitLab