diff --git a/gcc/testsuite/gcc.dg/vect/pr118558.c b/gcc/testsuite/gcc.dg/vect/pr118558.c new file mode 100644 index 0000000000000000000000000000000000000000..5483328d686bf1f6a5a9ea8cdb327c51a35668fc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr118558.c @@ -0,0 +1,15 @@ +#include "tree-vect.h" + +static unsigned long g_270[5][2] = {{123}}; +static short g_2312 = 0; +int main() +{ + check_vect (); + int g_1168 = 0; + unsigned t = 4; + for (g_1168 = 3; g_1168 >= 0; g_1168 -= 1) + for (g_2312 = 0; g_2312 <= 1; g_2312 += 1) + t = g_270[g_1168][0]; + if (t != 123) __builtin_abort(); +} + diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 21fb5cf5bd47ad9e37762909c6103adbf8752e2a..c0550acf6b2b231d1800a331a352668a3daf3d10 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2198,14 +2198,20 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, " non-consecutive accesses\n"); return false; } + + unsigned HOST_WIDE_INT dr_size + = vect_get_scalar_dr_size (first_dr_info); + poly_int64 off = 0; + if (*memory_access_type == VMAT_CONTIGUOUS_REVERSE) + off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size; + /* An overrun is fine if the trailing elements are smaller than the alignment boundary B. Every vector access will be a multiple of B and so we are guaranteed to access a non-gap element in the same B-sized block. */ if (overrun_p && gap < (vect_known_alignment_in_bytes (first_dr_info, - vectype) - / vect_get_scalar_dr_size (first_dr_info))) + vectype, off) / dr_size)) overrun_p = false; /* When we have a contiguous access across loop iterations @@ -2230,7 +2236,7 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, by simply loading half of the vector only. Usually the construction with an upper zero half will be elided. */ dr_alignment_support alss; - int misalign = dr_misalignment (first_dr_info, vectype); + int misalign = dr_misalignment (first_dr_info, vectype, off); tree half_vtype; poly_uint64 remain; unsigned HOST_WIDE_INT tem, num; @@ -11991,8 +11997,14 @@ vectorizable_load (vec_info *vinfo, tree ltype = vectype; tree new_vtype = NULL_TREE; unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info); + unsigned HOST_WIDE_INT dr_size + = vect_get_scalar_dr_size (first_dr_info); + poly_int64 off = 0; + if (memory_access_type == VMAT_CONTIGUOUS_REVERSE) + off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size; unsigned int vect_align - = vect_known_alignment_in_bytes (first_dr_info, vectype); + = vect_known_alignment_in_bytes (first_dr_info, vectype, + off); /* Try to use a single smaller load when we are about to load excess elements compared to the unrolled scalar loop. */ @@ -12013,9 +12025,7 @@ vectorizable_load (vec_info *vinfo, scalar loop. */ ; else if (known_gt (vect_align, - ((nunits - remain) - * vect_get_scalar_dr_size - (first_dr_info)))) + ((nunits - remain) * dr_size))) /* Aligned access to the gap area when there's at least one element in it is OK. */ ; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 79db02a39a8f7d100d4dce18ff9a7a60c87db20b..44d3a1d46c409597f1e67a275211a1da414fc7c7 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2028,9 +2028,10 @@ known_alignment_for_access_p (dr_vec_info *dr_info, tree vectype) of DR_INFO is guaranteed to have. */ inline unsigned int -vect_known_alignment_in_bytes (dr_vec_info *dr_info, tree vectype) +vect_known_alignment_in_bytes (dr_vec_info *dr_info, tree vectype, + poly_int64 offset = 0) { - int misalignment = dr_misalignment (dr_info, vectype); + int misalignment = dr_misalignment (dr_info, vectype, offset); if (misalignment == DR_MISALIGNMENT_UNKNOWN) return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr))); else if (misalignment == 0)