diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index 441450a9c0ba86333e341b7f017aba1845d503c4..b4396837c2468e1f43b4ffda4eceb43a34c2e7eb 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -2305,33 +2305,47 @@ public: return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs))); /* If one of the operands is all integer -1, fold to svneg. */ - tree pg = gimple_call_arg (f.call, 0); - tree negated_op = NULL; - if (integer_minus_onep (op2)) - negated_op = op1; - else if (integer_minus_onep (op1)) - negated_op = op2; - if (!f.type_suffix (0).unsigned_p && negated_op) + if (integer_minus_onep (op1) || integer_minus_onep (op2)) { - function_instance instance ("svneg", functions::svneg, shapes::unary, - MODE_none, f.type_suffix_ids, GROUP_none, - f.pred, FPM_unused); - gcall *call = f.redirect_call (instance); - unsigned offset_index = 0; - if (f.pred == PRED_m) + auto mul_by_m1 = [](gimple_folder &f, tree lhs_conv, + vec<tree> &args_conv) -> gimple * { - offset_index = 1; - gimple_call_set_arg (call, 0, op1); - } - else - gimple_set_num_ops (call, 5); - gimple_call_set_arg (call, offset_index, pg); - gimple_call_set_arg (call, offset_index + 1, negated_op); - return call; + gcc_assert (lhs_conv && args_conv.length () == 3); + tree pg = args_conv[0]; + tree op1 = args_conv[1]; + tree op2 = args_conv[2]; + tree negated_op = op1; + if (integer_minus_onep (op1)) + negated_op = op2; + type_suffix_pair signed_tsp = + {find_type_suffix (TYPE_signed, f.type_suffix (0).element_bits), + f.type_suffix_ids[1]}; + function_instance instance ("svneg", functions::svneg, + shapes::unary, MODE_none, signed_tsp, + GROUP_none, f.pred, FPM_unused); + gcall *call = f.redirect_call (instance); + gimple_call_set_lhs (call, lhs_conv); + unsigned offset = 0; + if (f.pred == PRED_m) + { + offset = 1; + gimple_call_set_arg (call, 0, op1); + } + else + gimple_set_num_ops (call, 5); + gimple_call_set_arg (call, offset, pg); + gimple_call_set_arg (call, offset + 1, negated_op); + return call; + }; + tree ty = + get_vector_type (find_type_suffix (TYPE_signed, + f.type_suffix (0).element_bits)); + return f.convert_and_fold (ty, mul_by_m1); } /* If one of the operands is a uniform power of 2, fold to a left shift by immediate. */ + tree pg = gimple_call_arg (f.call, 0); tree op1_cst = uniform_integer_cst_p (op1); tree op2_cst = uniform_integer_cst_p (op2); tree shift_op1, shift_op2 = NULL; diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 676e862c0f79e4b1b143a76b32154ab047e4dd6f..cf8ca89aefa2182f9d6f9bada03f6c526192ed5e 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -1130,14 +1130,6 @@ num_vectors_to_group (unsigned int nvectors) gcc_unreachable (); } -/* Return the vector type associated with TYPE. */ -static tree -get_vector_type (sve_type type) -{ - auto vector_type = type_suffixes[type.type].vector_type; - return acle_vector_types[type.num_vectors - 1][vector_type]; -} - /* If FNDECL is an SVE builtin, return its function instance, otherwise return null. */ const function_instance * @@ -3601,6 +3593,7 @@ gimple_folder::redirect_call (const function_instance &instance) return NULL; gimple_call_set_fndecl (call, rfn->decl); + gimple_call_set_fntype (call, TREE_TYPE (rfn->decl)); return call; } @@ -3675,6 +3668,46 @@ gimple_folder::fold_pfalse () return nullptr; } +/* Convert the lhs and all non-boolean vector-type operands to TYPE. + Pass the converted variables to the callback FP, and finally convert the + result back to the original type. Add the necessary conversion statements. + Return the new call. */ +gimple * +gimple_folder::convert_and_fold (tree type, + gimple *(*fp) (gimple_folder &, + tree, vec<tree> &)) +{ + gcc_assert (VECTOR_TYPE_P (type) + && TYPE_MODE (type) != VNx16BImode); + tree old_ty = TREE_TYPE (lhs); + gimple_seq stmts = NULL; + bool convert_lhs_p = !useless_type_conversion_p (type, old_ty); + tree lhs_conv = convert_lhs_p ? create_tmp_var (type) : lhs; + unsigned int num_args = gimple_call_num_args (call); + auto_vec<tree, 16> args_conv; + args_conv.safe_grow (num_args); + for (unsigned int i = 0; i < num_args; ++i) + { + tree op = gimple_call_arg (call, i); + tree op_ty = TREE_TYPE (op); + args_conv[i] = + (VECTOR_TYPE_P (op_ty) + && TYPE_MODE (op_ty) != VNx16BImode + && !useless_type_conversion_p (op_ty, type)) + ? gimple_build (&stmts, VIEW_CONVERT_EXPR, type, op) : op; + } + + gimple *new_stmt = fp (*this, lhs_conv, args_conv); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + if (convert_lhs_p) + { + tree t = build1 (VIEW_CONVERT_EXPR, old_ty, lhs_conv); + gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, t); + gsi_insert_after (gsi, g, GSI_SAME_STMT); + } + return new_stmt; +} + /* Fold the call to constant VAL. */ gimple * gimple_folder::fold_to_cstu (poly_uint64 val) diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h index eb623d5de08a8c445a188a76f75e45ad7529b968..54d213dfe6e0e1cd95e932fc4a04e9cd360f15f5 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.h +++ b/gcc/config/aarch64/aarch64-sve-builtins.h @@ -649,6 +649,8 @@ public: gcall *redirect_call (const function_instance &); gimple *redirect_pred_x (); gimple *fold_pfalse (); + gimple *convert_and_fold (tree, gimple *(*) (gimple_folder &, + tree, vec<tree> &)); gimple *fold_to_cstu (poly_uint64); gimple *fold_to_pfalse (); @@ -894,6 +896,14 @@ tuple_type_field (tree type) gcc_unreachable (); } +/* Return the vector type associated with TYPE. */ +inline tree +get_vector_type (sve_type type) +{ + auto vector_type = type_suffixes[type.type].vector_type; + return acle_vector_types[type.num_vectors - 1][vector_type]; +} + inline function_instance:: function_instance (const char *base_name_in, const function_base *base_in, const function_shape *shape_in, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c index bdf6fcb98d6be4564d6e3cd24fe237fbc4fffb9e..e228dc5995dfe43d3634127421f4935664a1646b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c @@ -174,8 +174,7 @@ TEST_UNIFORM_Z (mul_3_u16_m_untied, svuint16_t, /* ** mul_m1_u16_m: -** mov (z[0-9]+)\.b, #-1 -** mul z0\.h, p0/m, z0\.h, \1\.h +** neg z0\.h, p0/m, z0\.h ** ret */ TEST_UNIFORM_Z (mul_m1_u16_m, svuint16_t, @@ -569,7 +568,7 @@ TEST_UNIFORM_Z (mul_255_u16_x, svuint16_t, /* ** mul_m1_u16_x: -** mul z0\.h, z0\.h, #-1 +** neg z0\.h, p0/m, z0\.h ** ret */ TEST_UNIFORM_Z (mul_m1_u16_x, svuint16_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c index a61e85fa12d85e055a15603f5b4e62df48b82634..e8f52c9d7854b31e8c4438f13957ae1c05964552 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c @@ -174,8 +174,7 @@ TEST_UNIFORM_Z (mul_3_u32_m_untied, svuint32_t, /* ** mul_m1_u32_m: -** mov (z[0-9]+)\.b, #-1 -** mul z0\.s, p0/m, z0\.s, \1\.s +** neg z0\.s, p0/m, z0\.s ** ret */ TEST_UNIFORM_Z (mul_m1_u32_m, svuint32_t, @@ -569,7 +568,7 @@ TEST_UNIFORM_Z (mul_255_u32_x, svuint32_t, /* ** mul_m1_u32_x: -** mul z0\.s, z0\.s, #-1 +** neg z0\.s, p0/m, z0\.s ** ret */ TEST_UNIFORM_Z (mul_m1_u32_x, svuint32_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c index eee1f8a0c99979a652d215c30cd94cb3c5eb21aa..2ccdc3642c52085f5c0035af1a4c32341c013f50 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c @@ -183,14 +183,25 @@ TEST_UNIFORM_Z (mul_3_u64_m_untied, svuint64_t, /* ** mul_m1_u64_m: -** mov (z[0-9]+)\.b, #-1 -** mul z0\.d, p0/m, z0\.d, \1\.d +** neg z0\.d, p0/m, z0\.d ** ret */ TEST_UNIFORM_Z (mul_m1_u64_m, svuint64_t, z0 = svmul_n_u64_m (p0, z0, -1), z0 = svmul_m (p0, z0, -1)) +/* +** mul_m1r_u64_m: +** mov (z[0-9]+)\.b, #-1 +** mov (z[0-9]+\.d), z0\.d +** movprfx z0, \1 +** neg z0\.d, p0/m, \2 +** ret +*/ +TEST_UNIFORM_Z (mul_m1r_u64_m, svuint64_t, + z0 = svmul_u64_m (p0, svdup_u64 (-1), z0), + z0 = svmul_m (p0, svdup_u64 (-1), z0)) + /* ** mul_u64_z_tied1: ** movprfx z0\.d, p0/z, z0\.d @@ -597,13 +608,22 @@ TEST_UNIFORM_Z (mul_255_u64_x, svuint64_t, /* ** mul_m1_u64_x: -** mul z0\.d, z0\.d, #-1 +** neg z0\.d, p0/m, z0\.d ** ret */ TEST_UNIFORM_Z (mul_m1_u64_x, svuint64_t, z0 = svmul_n_u64_x (p0, z0, -1), z0 = svmul_x (p0, z0, -1)) +/* +** mul_m1r_u64_x: +** neg z0\.d, p0/m, z0\.d +** ret +*/ +TEST_UNIFORM_Z (mul_m1r_u64_x, svuint64_t, + z0 = svmul_u64_x (p0, svdup_u64 (-1), z0), + z0 = svmul_x (p0, svdup_u64 (-1), z0)) + /* ** mul_m127_u64_x: ** mul z0\.d, z0\.d, #-127 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c index 06ee1b3e7c8c087075a2680b926803016a0159b8..8e53a4821f05d19bf1e950ea40ef8ea0f80ab764 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c @@ -174,8 +174,7 @@ TEST_UNIFORM_Z (mul_3_u8_m_untied, svuint8_t, /* ** mul_m1_u8_m: -** mov (z[0-9]+)\.b, #-1 -** mul z0\.b, p0/m, z0\.b, \1\.b +** neg z0\.b, p0/m, z0\.b ** ret */ TEST_UNIFORM_Z (mul_m1_u8_m, svuint8_t, @@ -559,7 +558,7 @@ TEST_UNIFORM_Z (mul_128_u8_x, svuint8_t, /* ** mul_255_u8_x: -** mul z0\.b, z0\.b, #-1 +** neg z0\.b, p0/m, z0\.b ** ret */ TEST_UNIFORM_Z (mul_255_u8_x, svuint8_t, @@ -568,7 +567,7 @@ TEST_UNIFORM_Z (mul_255_u8_x, svuint8_t, /* ** mul_m1_u8_x: -** mul z0\.b, z0\.b, #-1 +** neg z0\.b, p0/m, z0\.b ** ret */ TEST_UNIFORM_Z (mul_m1_u8_x, svuint8_t,