diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index f9850320f61c5ddccf47e6583d304e5f405a484f..9858de6b171cc320301092a41e33910de3366ecc 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -16077,6 +16077,15 @@ private: leaving a vectorization of { elts }. */ bool m_stores_to_vector_load_decl = false; + /* Non-zero if the last operation we costed is a vector promotion or demotion. + In this case the value is the number of insns in the last operation. + + On AArch64 vector promotion and demotions require us to first widen or + narrow the input and only after that emit conversion instructions. For + costing this means we need to emit the cost of the final conversions as + well. */ + unsigned int m_num_last_promote_demote = 0; + /* - If M_VEC_FLAGS is zero then we're costing the original scalar code. - If M_VEC_FLAGS & VEC_ADVSIMD is nonzero then we're costing Advanced SIMD code. @@ -17132,6 +17141,29 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, stmt_cost = aarch64_sve_adjust_stmt_cost (m_vinfo, kind, stmt_info, vectype, stmt_cost); + /* Vector promotion and demotion requires us to widen the operation first + and only after that perform the conversion. Unfortunately the mid-end + expects this to be doable as a single operation and doesn't pass on + enough context here for us to tell which operation is happening. To + account for this we count every promote-demote operation twice and if + the previously costed operation was also a promote-demote we reduce + the cost of the currently being costed operation to simulate the final + conversion cost. Note that for SVE we can do better here if the converted + value comes from a load since the widening load would consume the widening + operations. However since we're in stage 3 we can't change the helper + vect_is_extending_load and duplicating the code seems not useful. */ + gassign *assign = NULL; + if (kind == vec_promote_demote + && (assign = dyn_cast <gassign *> (STMT_VINFO_STMT (stmt_info))) + && gimple_assign_rhs_code (assign) == FLOAT_EXPR) + { + auto new_count = count * 2 - m_num_last_promote_demote; + m_num_last_promote_demote = count; + count = new_count; + } + else + m_num_last_promote_demote = 0; + if (stmt_info && aarch64_use_new_vector_costs_p ()) { /* Account for any extra "embedded" costs that apply additively diff --git a/gcc/testsuite/gcc.target/aarch64/pr110625_4.c b/gcc/testsuite/gcc.target/aarch64/pr110625_4.c new file mode 100644 index 0000000000000000000000000000000000000000..34dac19d81a85d63706d54f4cb0c738ce592d5d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr110625_4.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -mcpu=neoverse-n2 -fdump-tree-vect-details" } */ + +typedef struct { + short blue, green, red, opacity; +} Pixel; + +double foo (long n, double *k, Pixel *k_pixels) { + double result_2, result_1, result_0; + for (; n; n++, k--) { + result_0 += *k * k_pixels[n].red; + result_1 += *k * k_pixels[n].green; + result_2 += *k * k_pixels[n].blue; + } + return result_0 + result_1 + result_2; +} + +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1.c index 0f96dc2ff007340541c2ba7d51e1ccfa0f3f2d39..4c5e88657408f61156035012212ed542fac45efb 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize -fno-inline" } */ +/* { dg-options "-O2 -ftree-vectorize -fno-inline --param aarch64-sve-compare-costs=0" } */ #include <stdint.h> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c index 70465f91eba4f80140b2059481eb8f06bbc9ace7..3ff2bd127756b2ff08095513b09325db4779ba02 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -ftree-vectorize" } */ +/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */ #include <stdint.h>