From 682b96647b7d12768d939caef860c5a180654b0a Mon Sep 17 00:00:00 2001
From: Paolo Carlini <pcarlini@suse.de>
Date: Sun, 10 Dec 2006 16:47:27 +0000
Subject: [PATCH] valarray-inst.cc (__gslice_to_index): Optimize performance.

2006-12-10  Paolo Carlini  <pcarlini@suse.de>

	* src/valarray-inst.cc (__gslice_to_index): Optimize performance.
	* testsuite/performance/26_numerics/valarray_gslice_to_index.cc: New.

From-SVN: r119707
---
 libstdc++-v3/ChangeLog                        |  5 ++
 libstdc++-v3/src/valarray-inst.cc             | 43 +++++++-------
 .../26_numerics/valarray_gslice_to_index.cc   | 57 +++++++++++++++++++
 3 files changed, 81 insertions(+), 24 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/performance/26_numerics/valarray_gslice_to_index.cc

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index a2c4a1dd0b94..a1f612946a1c 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,8 @@
+2006-12-10  Paolo Carlini  <pcarlini@suse.de>
+
+	* src/valarray-inst.cc (__gslice_to_index): Optimize performance.
+	* testsuite/performance/26_numerics/valarray_gslice_to_index.cc: New.
+
 2006-12-08  Jakub Jelinek  <jakub@redhat.com>
 
 	* testsuite/util/testsuite_hooks.cc (set_memory_limits): Don't set
diff --git a/libstdc++-v3/src/valarray-inst.cc b/libstdc++-v3/src/valarray-inst.cc
index c13e1a2454bd..aa8deb1183ef 100644
--- a/libstdc++-v3/src/valarray-inst.cc
+++ b/libstdc++-v3/src/valarray-inst.cc
@@ -1,6 +1,7 @@
 // Explicit instantiation file.
 
-// Copyright (C) 2001, 2004, 2005 Free Software Foundation, Inc.
+// Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006
+// Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the
@@ -69,40 +70,34 @@ _GLIBCXX_BEGIN_NAMESPACE(std)
                     const valarray<size_t>& __s, valarray<size_t>& __i)
   {
     // There are as much as dimensions as there are strides.
-    size_t __n = __l.size();
+    const size_t __n = __l.size();
 
-    // Get a buffer to hold current multi-index as we go through
-    // the gslice for the purpose of computing its linear-image.
-    size_t* const __t = static_cast<size_t*>
-      (__builtin_alloca(__n * sizeof (size_t)));
-    __valarray_fill(__t, __n, size_t(0));
+    // Holds current multi-index as we go through the gslice for the
+    // purpose of computing its linear-image.
+    valarray<size_t> __t(__l);
 
     // Note that this should match the product of all numbers appearing
     // in __l which describes the multidimensional sizes of the
-    // the generalized slice.
+    // generalized slice.
     const size_t __z = __i.size();
-    
+
     for (size_t __j = 0; __j < __z; ++__j)
       {
-        // Compute the linear-index image of (t_0, ... t_{n-1}).
-        // Normaly, we should use inner_product<>(), but we do it the
-        // the hard way here to avoid link-time can of worms.
-        size_t __a = __o;
-        for (size_t __k = 0; __k < __n; ++__k)
-          __a += __s[__k] * __t[__k];
+	// Compute the linear-index image of (t_0, ... t_{n-1}).
+	__i[__j] = __o;
 
-        __i[__j] = __a;
+	--__t[__n - 1];
+	__o += __s[__n - 1];
 
         // Process the next multi-index.  The loop ought to be
-        // backward since we're making a lexicagraphical visit.
-        ++__t[__n - 1];
-        for (size_t __k2 = __n - 1; __k2; --__k2)
+        // backward since we're making a lexicographical visit.
+        for (size_t __k2 = __n - 1; __k2 && !__t[__k2]; --__k2)
           {
-            if (__t[__k2] >= __l[__k2])
-              {
-                __t[__k2] = 0;
-                ++__t[__k2 - 1];
-              }
+	    __o -= __s[__k2] * __l[__k2];
+	    __t[__k2] = __l[__k2];
+
+	    --__t[__k2 - 1];
+	    __o += __s[__k2 - 1];
           }
       }
   }
diff --git a/libstdc++-v3/testsuite/performance/26_numerics/valarray_gslice_to_index.cc b/libstdc++-v3/testsuite/performance/26_numerics/valarray_gslice_to_index.cc
new file mode 100644
index 000000000000..42805ba044d8
--- /dev/null
+++ b/libstdc++-v3/testsuite/performance/26_numerics/valarray_gslice_to_index.cc
@@ -0,0 +1,57 @@
+// Copyright (C) 2006 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 2, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING.  If not, write to the Free
+// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+// USA.
+
+// As a special exception, you may use this file as part of a free software
+// library without restriction.  Specifically, if other files instantiate
+// templates or use macros or inline functions from this file, or you compile
+// this file and link it with other files to produce an executable, this
+// file does not by itself cause the resulting executable to be covered by
+// the GNU General Public License.  This exception does not however
+// invalidate any other reasons why the executable file might be covered by
+// the GNU General Public License.
+
+#include <valarray>
+#include <testsuite_performance.h>
+
+int main()
+{
+  using namespace std;
+  using namespace __gnu_test;
+
+  time_counter time;
+  resource_counter resource;
+
+  valarray<double> va(1000000);
+  
+  for (int i = 0; i < 1000000; ++i)
+    va[i] = i;
+
+  size_t lengthvalues[] = { 10, 10, 10, 10, 10, 10 };
+  size_t stridevalues[] = { 1, 1, 1, 1, 1, 1 };
+
+  valarray<size_t> lengths(lengthvalues, 6);
+  valarray<size_t> stride(stridevalues, 6);
+
+  start_counters(time, resource);
+  for (int j = 0; j < 1000; ++j)
+    va[gslice(0, lengths, stride)];
+  stop_counters(time, resource);
+  report_performance(__FILE__, "", time, resource);
+
+  return 0;
+}
-- 
GitLab