From a3895f55f7b6278ed4ce31883d75ed3094da96af Mon Sep 17 00:00:00 2001
From: Ira Rosen <irar@il.ibm.com>
Date: Tue, 12 Dec 2006 13:07:39 +0000
Subject: [PATCH] tree-vect-analyze.c (vect_analyze_data_ref_access): Add
 another check for stores with gaps.

	* tree-vect-analyze.c (vect_analyze_data_ref_access): Add another check
	for stores with gaps.
	* tree-vect-transform.c (vect_permute_store_chain): Create
	interleave_high or interleave_low according to the endianess.

From-SVN: r119779
---
 gcc/ChangeLog                                 |  7 ++
 gcc/testsuite/ChangeLog                       |  8 ++
 .../gcc.dg/vect/vect-strided-store-a-u8-i2.c  | 60 +++++++++++++++
 .../gcc.dg/vect/vect-strided-store-u16-i4.c   | 73 +++++++++++++++++++
 .../gcc.dg/vect/vect-strided-store-u32-i2.c   | 45 ++++++++++++
 .../gcc.dg/vect/vect-strided-u8-i8-gap4.c     | 20 ++++-
 gcc/tree-vect-analyze.c                       | 12 ++-
 gcc/tree-vect-transform.c                     | 34 +++++++--
 8 files changed, 249 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-strided-store-u16-i4.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-strided-store-u32-i2.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 61f49fa745b2..23721c401058 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2006-12-12  Ira Rosen  <irar@il.ibm.com>
+
+	* tree-vect-analyze.c (vect_analyze_data_ref_access): Add another check
+	for stores with gaps.
+	* tree-vect-transform.c (vect_permute_store_chain): Create
+	interleave_high or interleave_low according to the endianess.
+
 2006-12-12  Richard Guenther  <rguenther@suse.de>
 
 	PR middle-end/30147
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 8e8c438739ea..724bd05c8277 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,11 @@
+2006-12-12  Ira Rosen  <irar@il.ibm.com>
+
+	* gcc.dg/vect/vect-strided-store-u32-i2.c: New test.
+	* gcc.dg/vect/vect-strided-store-a-u8-i2.c: New test.
+	* gcc.dg/vect/vect-strided-store-u16-i4.c: New test.
+	* gcc.dg/vect/vect-strided-u8-i8-gap4.c: Add a case of stores with
+	gaps.
+
 2006-12-12  Richard Guenther  <rguenther@suse.de>
 
 	PR middle-end/30147
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c b/gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c
new file mode 100644
index 000000000000..e7125b1458d2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c
@@ -0,0 +1,60 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64 
+
+typedef struct {
+   unsigned char a;
+   unsigned char b;
+} s;
+
+int
+main1 ()
+{
+  s arr[N];
+  s *ptr = arr;
+  s res[N];
+  int i;
+  unsigned char a[N], b[N];
+
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = i;
+      b[i] = i * 2;
+      if (a[i] == 178)
+         abort();
+    }
+
+  for (i = 0; i < N; i++)
+    {
+      res[i].a = a[i] + 3;
+      res[i].b = a[i] + b[i];
+      ptr++;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    {
+      if (res[i].a != a[i] + 3
+          || res[i].b != a[i] + b[i])
+        abort ();
+    }
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  main1 ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-store-u16-i4.c b/gcc/testsuite/gcc.dg/vect/vect-strided-store-u16-i4.c
new file mode 100644
index 000000000000..629ab79b7c31
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-store-u16-i4.c
@@ -0,0 +1,73 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 128 
+
+typedef struct {
+   unsigned short a;
+   unsigned short b;
+   unsigned short c;
+   unsigned short d;
+} s;
+
+unsigned short a[N];
+unsigned short b[N];
+unsigned short c[N];
+
+int
+main1 (s *arr)
+{
+  int i;
+  s *ptr = arr;
+  s res[N];
+  unsigned short x, y, z, w;
+
+  for (i = 0; i < N; i++)
+    {
+      res[i].c = a[i];
+      res[i].a = b[i];
+      res[i].d = c[i];
+      res[i].b = a[i] + b [i];
+      ptr++;
+    }
+
+  /* check results:  */
+  for (i = 0; i < N; i++)
+    {
+      if (res[i].c != a[i]
+          || res[i].a != b[i]
+          || res[i].d != c[i]
+          || res[i].b != a[i] + b[i])
+        abort ();
+    }
+
+  return 0;
+}
+
+int main (void)
+{
+  int i;
+  s arr[N];
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = i;
+      b[i] = i * 2;
+      c[i] = 17;
+      if (a[i] == 178)
+         abort();
+    }
+
+  main1 (arr);
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target { vect_interleave } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-store-u32-i2.c b/gcc/testsuite/gcc.dg/vect/vect-strided-store-u32-i2.c
new file mode 100644
index 000000000000..ec810985b154
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-store-u32-i2.c
@@ -0,0 +1,45 @@
+/* { dg-require-effective-target vect_float } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+int
+main1 (void)
+{
+  int i;
+  int a[N*2];
+  int b[N] = {0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30};
+  int c[N] = {1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31};
+
+  /* Strided access pattern.  */
+  for (i = 0; i < N/2; i++)
+    {
+      a[i*2] = b[i] + c[i];
+      a[i*2+1] = b[i] * c[i];
+    }
+
+  /* Check results.  */
+  for (i = 0; i < N/2; i++)
+    {
+      if (a[i*2] != b[i] + c[i]
+          || a[i*2+1] != b[i] * c[i])
+        abort();
+    }
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+  return main1 ();
+}
+
+/* Needs interleaving support.  */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail { vect_interleave } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c
index c176b3264f4c..75691467e9ae 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c
@@ -49,8 +49,26 @@ main1 (s *arr)
           || res[i].e != arr[i].b + arr[i].e
           || res[i].h != arr[i].c
           || res[i].g != arr[i].b + arr[i].c)
-          abort();
+          abort ();
    }
+
+  ptr = arr;
+  /* Not vectorizable: gap in store. */
+  for (i = 0; i < N; i++)
+    { 
+      res[i].a = ptr->b;
+      res[i].b = ptr->c;
+      ptr++; 
+    }
+  
+  /* Check results.  */
+  for (i = 0; i < N; i++)
+    {
+      if (res[i].a != arr[i].b 
+	  || res[i].b != arr[i].c)
+          abort ();
+    }
+
 }
 
 
diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c
index 7506150b6ad2..c8b2bf858d6e 100644
--- a/gcc/tree-vect-analyze.c
+++ b/gcc/tree-vect-analyze.c
@@ -1804,7 +1804,8 @@ vect_analyze_data_ref_access (struct data_reference *dr)
       /* COUNT is the number of accesses found, we multiply it by the size of 
 	 the type to get COUNT_IN_BYTES.  */
       count_in_bytes = type_size * count;
-      /* Check the size of the interleaving is not greater than STEP.  */
+
+      /* Check that the size of the interleaving is not greater than STEP.  */
       if (dr_step < count_in_bytes) 
 	{
 	  if (vect_print_dump_info (REPORT_DETAILS))
@@ -1815,6 +1816,15 @@ vect_analyze_data_ref_access (struct data_reference *dr)
 	  return false;
 	}
 
+      /* Check that the size of the interleaving is equal to STEP for stores, 
+         i.e., that there are no gaps.  */ 
+      if (!DR_IS_READ (dr) && dr_step != count_in_bytes) 
+	{
+	  if (vect_print_dump_info (REPORT_DETAILS))
+	    fprintf (vect_dump, "interleaved store with gaps");
+	  return false;
+	}
+
       /* Check that STEP is a multiple of type size.  */
       if ((dr_step % type_size) != 0)
 	{
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index 1be768953d1f..f83d92a39ff2 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -2592,23 +2592,41 @@ vect_permute_store_chain (VEC(tree,heap) *dr_chain,
 	  vect1 = VEC_index (tree, dr_chain, j);
 	  vect2 = VEC_index (tree, dr_chain, j+length/2);
 
-	  /* high = interleave_high (vect1, vect2);  */
+	  /* Create interleaving stmt:
+	     in the case of big endian: 
+                                high = interleave_high (vect1, vect2) 
+             and in the case of little endian: 
+                                high = interleave_low (vect1, vect2).  */
 	  perm_dest = create_tmp_var (vectype, "vect_inter_high");
 	  add_referenced_var (perm_dest);
-	  perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
-			      build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, 
-				      vect2));
+          if (BYTES_BIG_ENDIAN)
+	    perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
+	        		build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, 
+                                        vect1, vect2)); 
+	  else
+            perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
+                                build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, 
+                                        vect1, vect2));
 	  high = make_ssa_name (perm_dest, perm_stmt);
 	  GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
 	  vect_finish_stmt_generation (stmt, perm_stmt, bsi);
 	  VEC_replace (tree, *result_chain, 2*j, high);
 
-	  /* low = interleave_low (vect1, vect2);  */
+	  /* Create interleaving stmt:
+             in the case of big endian:
+                               low  = interleave_low (vect1, vect2) 
+             and in the case of little endian:
+                               low  = interleave_high (vect1, vect2).  */     
 	  perm_dest = create_tmp_var (vectype, "vect_inter_low");
 	  add_referenced_var (perm_dest);
-	  perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
-			      build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, 
-				      vect2));
+	  if (BYTES_BIG_ENDIAN)
+	    perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
+	               	        build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, 
+ 					vect1, vect2));
+	  else
+            perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
+                                build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, 
+                                        vect1, vect2));
 	  low = make_ssa_name (perm_dest, perm_stmt);
 	  GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
 	  vect_finish_stmt_generation (stmt, perm_stmt, bsi);
-- 
GitLab