From c9ba330781f41dd3e35d1c775a3a3eeaa0b4c61e Mon Sep 17 00:00:00 2001 From: Jakub Jelinek <jakub@redhat.com> Date: Thu, 28 Jun 2012 19:52:51 +0200 Subject: [PATCH] re PR tree-optimization/53645 (Missed optimization for vector integer division lowering) PR tree-optimization/53645 * tree-vect-generic.c (expand_vector_divmod): Use MULT_HIGHPART_EXPR instead of VEC_WIDEN_MULT_{HI,LO}_EXPR followed by VEC_PERM_EXPR if possible. * gcc.c-torture/execute/pr53645-2.c: New test. From-SVN: r189052 --- gcc/ChangeLog | 7 + gcc/testsuite/ChangeLog | 5 + .../gcc.c-torture/execute/pr53645-2.c | 120 ++++++++++++++++++ gcc/tree-vect-generic.c | 89 +++++++------ 4 files changed, 184 insertions(+), 37 deletions(-) create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr53645-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 04a35102a38b..86d0176d3993 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2012-06-28 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/53645 + * tree-vect-generic.c (expand_vector_divmod): Use MULT_HIGHPART_EXPR + instead of VEC_WIDEN_MULT_{HI,LO}_EXPR followed by VEC_PERM_EXPR + if possible. + 2012-06-28 Georg-Johann Lay <avr@gjlay.de> PR 53595 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 406bd1f5dcf7..20e38a0a99d0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2012-06-28 Jakub Jelinek <jakub@redhat.com> + + PR tree-optimization/53645 + * gcc.c-torture/execute/pr53645-2.c: New test. + 2012-06-28 Richard Guenther <rguenther@suse.de> PR middle-end/53790 diff --git a/gcc/testsuite/gcc.c-torture/execute/pr53645-2.c b/gcc/testsuite/gcc.c-torture/execute/pr53645-2.c new file mode 100644 index 000000000000..a03dd2ef8dc2 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr53645-2.c @@ -0,0 +1,120 @@ +/* PR tree-optimization/53645 */ + +typedef unsigned short int UV __attribute__((vector_size (16))); +typedef short int SV __attribute__((vector_size (16))); +extern void abort (void); + +#define TEST(a, b, c, d, e, f, g, h) \ +__attribute__((noinline)) void \ +uq##a##b##c##d##e##f##g##h (UV *x, UV *y) \ +{ \ + *x = *y / ((UV) { a, b, c, d, e, f, g, h }); \ +} \ + \ +__attribute__((noinline)) void \ +ur##a##b##c##d##e##f##g##h (UV *x, UV *y) \ +{ \ + *x = *y % ((UV) { a, b, c, d, e, f, g, h }); \ +} \ + \ +__attribute__((noinline)) void \ +sq##a##b##c##d##e##f##g##h (SV *x, SV *y) \ +{ \ + *x = *y / ((SV) { a, b, c, d, e, f, g, h }); \ +} \ + \ +__attribute__((noinline)) void \ +sr##a##b##c##d##e##f##g##h (SV *x, SV *y) \ +{ \ + *x = *y % ((SV) { a, b, c, d, e, f, g, h }); \ +} + +#define TESTS \ +TEST (4, 4, 4, 4, 4, 4, 4, 4) \ +TEST (1, 4, 2, 8, 16, 64, 32, 128) \ +TEST (3, 3, 3, 3, 3, 3, 3, 3) \ +TEST (6, 5, 6, 5, 6, 5, 6, 5) \ +TEST (14, 14, 14, 6, 14, 6, 14, 14) \ +TEST (7, 7, 7, 7, 7, 7, 7, 7) \ + +TESTS + +UV u[] = + { ((UV) { 73U, 65531U, 0U, 174U, 921U, 65535U, 17U, 178U }), + ((UV) { 1U, 8173U, 65535U, 65472U, 12U, 29612U, 128U, 8912U }) }; +SV s[] = + { ((SV) { 73, -9123, 32761, 8191, 16371, 1201, 12701, 9999 }), + ((SV) { 9903, -1, -7323, 0, -7, -323, 9124, -9199 }) }; + +int +main () +{ + UV ur, ur2; + SV sr, sr2; + int i; +#undef TEST +#define TEST(a, b, c, d, e, f, g, h) \ + uq##a##b##c##d##e##f##g##h (&ur, u + i); \ + if (ur[0] != u[i][0] / a || ur[3] != u[i][3] / d) \ + abort (); \ + asm volatile ("" : : "r" (&ur) : "memory"); \ + if (ur[2] != u[i][2] / c || ur[1] != u[i][1] / b) \ + abort (); \ + asm volatile ("" : : "r" (&ur) : "memory"); \ + if (ur[4] != u[i][4] / e || ur[7] != u[i][7] / h) \ + abort (); \ + asm volatile ("" : : "r" (&ur) : "memory"); \ + if (ur[6] != u[i][6] / g || ur[5] != u[i][5] / f) \ + abort (); \ + asm volatile ("" : : "r" (&ur) : "memory"); \ + ur##a##b##c##d##e##f##g##h (&ur, u + i); \ + if (ur[0] != u[i][0] % a || ur[3] != u[i][3] % d) \ + abort (); \ + asm volatile ("" : : "r" (&ur) : "memory"); \ + if (ur[2] != u[i][2] % c || ur[1] != u[i][1] % b) \ + abort (); \ + asm volatile ("" : : "r" (&ur) : "memory"); \ + if (ur[4] != u[i][4] % e || ur[7] != u[i][7] % h) \ + abort (); \ + asm volatile ("" : : "r" (&ur) : "memory"); \ + if (ur[6] != u[i][6] % g || ur[5] != u[i][5] % f) \ + abort (); \ + asm volatile ("" : : "r" (&ur) : "memory"); + for (i = 0; i < sizeof (u) / sizeof (u[0]); i++) + { + TESTS + } +#undef TEST +#define TEST(a, b, c, d, e, f, g, h) \ + sq##a##b##c##d##e##f##g##h (&sr, s + i); \ + if (sr[0] != s[i][0] / a || sr[3] != s[i][3] / d) \ + abort (); \ + asm volatile ("" : : "r" (&sr) : "memory"); \ + if (sr[2] != s[i][2] / c || sr[1] != s[i][1] / b) \ + abort (); \ + asm volatile ("" : : "r" (&sr) : "memory"); \ + if (sr[4] != s[i][4] / e || sr[7] != s[i][7] / h) \ + abort (); \ + asm volatile ("" : : "r" (&sr) : "memory"); \ + if (sr[6] != s[i][6] / g || sr[5] != s[i][5] / f) \ + abort (); \ + asm volatile ("" : : "r" (&sr) : "memory"); \ + sr##a##b##c##d##e##f##g##h (&sr, s + i); \ + if (sr[0] != s[i][0] % a || sr[3] != s[i][3] % d) \ + abort (); \ + asm volatile ("" : : "r" (&sr) : "memory"); \ + if (sr[2] != s[i][2] % c || sr[1] != s[i][1] % b) \ + abort (); \ + asm volatile ("" : : "r" (&sr) : "memory"); \ + if (sr[4] != s[i][4] % e || sr[7] != s[i][7] % h) \ + abort (); \ + asm volatile ("" : : "r" (&sr) : "memory"); \ + if (sr[6] != s[i][6] % g || sr[5] != s[i][5] % f) \ + abort (); \ + asm volatile ("" : : "r" (&sr) : "memory"); + for (i = 0; i < sizeof (s) / sizeof (s[0]); i++) + { + TESTS + } + return 0; +} diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c index 1b3ff274fd03..c83db5e1e304 100644 --- a/gcc/tree-vect-generic.c +++ b/gcc/tree-vect-generic.c @@ -455,7 +455,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type))); optab op; tree *vec; - unsigned char *sel; + unsigned char *sel = NULL; tree cur_op, mhi, mlo, mulcst, perm_mask, wider_type, tem; if (prec > HOST_BITS_PER_WIDE_INT) @@ -744,26 +744,34 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) return NULL_TREE; - op = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR, type, optab_default); - if (op == NULL - || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) - return NULL_TREE; - op = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR, type, optab_default); - if (op == NULL - || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) - return NULL_TREE; - sel = XALLOCAVEC (unsigned char, nunits); - for (i = 0; i < nunits; i++) - sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1); - if (!can_vec_perm_p (TYPE_MODE (type), false, sel)) - return NULL_TREE; - wider_type - = build_vector_type (build_nonstandard_integer_type (prec * 2, unsignedp), - nunits / 2); - if (GET_MODE_CLASS (TYPE_MODE (wider_type)) != MODE_VECTOR_INT - || GET_MODE_BITSIZE (TYPE_MODE (wider_type)) - != GET_MODE_BITSIZE (TYPE_MODE (type))) - return NULL_TREE; + op = optab_for_tree_code (MULT_HIGHPART_EXPR, type, optab_default); + if (op != NULL + && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) + wider_type = NULL_TREE; + else + { + op = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR, type, optab_default); + if (op == NULL + || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) + return NULL_TREE; + op = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR, type, optab_default); + if (op == NULL + || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) + return NULL_TREE; + sel = XALLOCAVEC (unsigned char, nunits); + for (i = 0; i < nunits; i++) + sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1); + if (!can_vec_perm_p (TYPE_MODE (type), false, sel)) + return NULL_TREE; + wider_type + = build_vector_type (build_nonstandard_integer_type (prec * 2, + unsignedp), + nunits / 2); + if (GET_MODE_CLASS (TYPE_MODE (wider_type)) != MODE_VECTOR_INT + || GET_MODE_BITSIZE (TYPE_MODE (wider_type)) + != GET_MODE_BITSIZE (TYPE_MODE (type))) + return NULL_TREE; + } cur_op = op0; @@ -772,7 +780,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, case 0: gcc_assert (unsignedp); /* t1 = oprnd0 >> pre_shift; - t2 = (type) (t1 w* ml >> prec); + t2 = t1 h* ml; q = t2 >> post_shift; */ cur_op = add_rshift (gsi, type, cur_op, pre_shifts); if (cur_op == NULL_TREE) @@ -801,30 +809,37 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, for (i = 0; i < nunits; i++) vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]); mulcst = build_vector (type, vec); - for (i = 0; i < nunits; i++) - vec[i] = build_int_cst (TREE_TYPE (type), sel[i]); - perm_mask = build_vector (type, vec); - mhi = gimplify_build2 (gsi, VEC_WIDEN_MULT_HI_EXPR, wider_type, - cur_op, mulcst); - mhi = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mhi); - mlo = gimplify_build2 (gsi, VEC_WIDEN_MULT_LO_EXPR, wider_type, - cur_op, mulcst); - mlo = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mlo); - if (BYTES_BIG_ENDIAN) - cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, mhi, mlo, perm_mask); + if (wider_type == NULL_TREE) + cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst); else - cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, mlo, mhi, perm_mask); + { + for (i = 0; i < nunits; i++) + vec[i] = build_int_cst (TREE_TYPE (type), sel[i]); + perm_mask = build_vector (type, vec); + mhi = gimplify_build2 (gsi, VEC_WIDEN_MULT_HI_EXPR, wider_type, + cur_op, mulcst); + mhi = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mhi); + mlo = gimplify_build2 (gsi, VEC_WIDEN_MULT_LO_EXPR, wider_type, + cur_op, mulcst); + mlo = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, mlo); + if (BYTES_BIG_ENDIAN) + cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, mhi, mlo, + perm_mask); + else + cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, mlo, mhi, + perm_mask); + } switch (mode) { case 0: /* t1 = oprnd0 >> pre_shift; - t2 = (type) (t1 w* ml >> prec); + t2 = t1 h* ml; q = t2 >> post_shift; */ cur_op = add_rshift (gsi, type, cur_op, post_shifts); break; case 1: - /* t1 = (type) (oprnd0 w* ml >> prec); + /* t1 = oprnd0 h* ml; t2 = oprnd0 - t1; t3 = t2 >> 1; t4 = t1 + t3; @@ -848,7 +863,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, case 3: case 4: case 5: - /* t1 = (type) (oprnd0 w* ml >> prec); + /* t1 = oprnd0 h* ml; t2 = t1; [ iff (mode & 2) != 0 ] t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ] t3 = t2 >> post_shift; -- GitLab