diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc index e47acb67aeea481caaa96a871c9959b079ea0cd3..327688756d1bb31c7517958736a739729cffcc83 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc @@ -768,6 +768,27 @@ public: if (integer_zerop (op1) || integer_zerop (op2)) return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs))); + /* If the divisor is all integer -1, fold to svneg. */ + tree pg = gimple_call_arg (f.call, 0); + if (!f.type_suffix (0).unsigned_p && integer_minus_onep (op2)) + { + function_instance instance ("svneg", functions::svneg, + shapes::unary, MODE_none, + f.type_suffix_ids, GROUP_none, f.pred); + gcall *call = f.redirect_call (instance); + unsigned offset_index = 0; + if (f.pred == PRED_m) + { + offset_index = 1; + gimple_call_set_arg (call, 0, op1); + } + else + gimple_set_num_ops (call, 5); + gimple_call_set_arg (call, offset_index, pg); + gimple_call_set_arg (call, offset_index + 1, op1); + return call; + } + /* If the divisor is a uniform power of 2, fold to a shift instruction. */ tree op2_cst = uniform_integer_cst_p (op2); @@ -2047,12 +2068,37 @@ public: if (integer_zerop (op1) || integer_zerop (op2)) return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs))); + /* If one of the operands is all integer -1, fold to svneg. */ + tree pg = gimple_call_arg (f.call, 0); + tree negated_op = NULL; + if (integer_minus_onep (op2)) + negated_op = op1; + else if (integer_minus_onep (op1)) + negated_op = op2; + if (!f.type_suffix (0).unsigned_p && negated_op) + { + function_instance instance ("svneg", functions::svneg, + shapes::unary, MODE_none, + f.type_suffix_ids, GROUP_none, f.pred); + gcall *call = f.redirect_call (instance); + unsigned offset_index = 0; + if (f.pred == PRED_m) + { + offset_index = 1; + gimple_call_set_arg (call, 0, op1); + } + else + gimple_set_num_ops (call, 5); + gimple_call_set_arg (call, offset_index, pg); + gimple_call_set_arg (call, offset_index + 1, negated_op); + return call; + } + /* If one of the operands is a uniform power of 2, fold to a left shift by immediate. */ - tree pg = gimple_call_arg (f.call, 0); tree op1_cst = uniform_integer_cst_p (op1); tree op2_cst = uniform_integer_cst_p (op2); - tree shift_op1, shift_op2; + tree shift_op1, shift_op2 = NULL; if (op1_cst && integer_pow2p (op1_cst) && (f.pred != PRED_m || is_ptrue (pg, f.type_suffix (0).element_bytes))) @@ -2068,15 +2114,20 @@ public: else return NULL; - shift_op2 = wide_int_to_tree (unsigned_type_for (TREE_TYPE (shift_op2)), - tree_log2 (shift_op2)); - function_instance instance ("svlsl", functions::svlsl, - shapes::binary_uint_opt_n, MODE_n, - f.type_suffix_ids, GROUP_none, f.pred); - gcall *call = f.redirect_call (instance); - gimple_call_set_arg (call, 1, shift_op1); - gimple_call_set_arg (call, 2, shift_op2); - return call; + if (shift_op2) + { + shift_op2 = wide_int_to_tree (unsigned_type_for (TREE_TYPE (shift_op2)), + tree_log2 (shift_op2)); + function_instance instance ("svlsl", functions::svlsl, + shapes::binary_uint_opt_n, MODE_n, + f.type_suffix_ids, GROUP_none, f.pred); + gcall *call = f.redirect_call (instance); + gimple_call_set_arg (call, 1, shift_op1); + gimple_call_set_arg (call, 2, shift_op2); + return call; + } + + return NULL; } }; diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c index a338b28805e7696cd0b5e9938d8573126f66965a..719adc818524da02d892581f041025b27eeb2dae 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c @@ -55,6 +55,15 @@ TEST_UNIFORM_ZX (div_w0_s32_m_untied, svint32_t, int32_t, z0 = svdiv_n_s32_m (p0, z1, x0), z0 = svdiv_m (p0, z1, x0)) +/* +** div_m1_s32_m_tied1: +** neg z0\.s, p0/m, z0\.s +** ret +*/ +TEST_UNIFORM_Z (div_m1_s32_m_tied1, svint32_t, + z0 = svdiv_n_s32_m (p0, z0, -1), + z0 = svdiv_m (p0, z0, -1)) + /* ** div_1_s32_m_tied1: ** ret @@ -63,6 +72,16 @@ TEST_UNIFORM_Z (div_1_s32_m_tied1, svint32_t, z0 = svdiv_n_s32_m (p0, z0, 1), z0 = svdiv_m (p0, z0, 1)) +/* +** div_m1_s32_m_untied: +** movprfx z0, z1 +** neg z0\.s, p0/m, z1\.s +** ret +*/ +TEST_UNIFORM_Z (div_m1_s32_m_untied, svint32_t, + z0 = svdiv_n_s32_m (p0, z1, -1), + z0 = svdiv_m (p0, z1, -1)) + /* ** div_1_s32_m_untied: ** mov z0\.d, z1\.d @@ -214,6 +233,17 @@ TEST_UNIFORM_ZX (div_w0_s32_z_untied, svint32_t, int32_t, z0 = svdiv_n_s32_z (p0, z1, x0), z0 = svdiv_z (p0, z1, x0)) +/* +** div_m1_s32_z_tied1: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0\.s, p0/z, \1\.s +** neg z0\.s, p0/m, \1\.s +** ret +*/ +TEST_UNIFORM_Z (div_m1_s32_z_tied1, svint32_t, + z0 = svdiv_n_s32_z (p0, z0, -1), + z0 = svdiv_z (p0, z0, -1)) + /* ** div_1_s32_z_tied1: ** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 @@ -224,6 +254,16 @@ TEST_UNIFORM_Z (div_1_s32_z_tied1, svint32_t, z0 = svdiv_n_s32_z (p0, z0, 1), z0 = svdiv_z (p0, z0, 1)) +/* +** div_m1_s32_z_untied: +** movprfx z0\.s, p0/z, z1\.s +** neg z0\.s, p0/m, z1\.s +** ret +*/ +TEST_UNIFORM_Z (div_m1_s32_z_untied, svint32_t, + z0 = svdiv_n_s32_z (p0, z1, -1), + z0 = svdiv_z (p0, z1, -1)) + /* ** div_1_s32_z_untied: ** movi? [vdz]([0-9]+)\.?(?:[0-9]*[bhsd])?, #?0 @@ -381,6 +421,15 @@ TEST_UNIFORM_ZX (div_w0_s32_x_untied, svint32_t, int32_t, z0 = svdiv_n_s32_x (p0, z1, x0), z0 = svdiv_x (p0, z1, x0)) +/* +** div_m1_s32_x_tied1: +** neg z0\.s, p0/m, z0\.s +** ret +*/ +TEST_UNIFORM_Z (div_m1_s32_x_tied1, svint32_t, + z0 = svdiv_n_s32_x (p0, z0, -1), + z0 = svdiv_x (p0, z0, -1)) + /* ** div_1_s32_x_tied1: ** ret @@ -389,6 +438,16 @@ TEST_UNIFORM_Z (div_1_s32_x_tied1, svint32_t, z0 = svdiv_n_s32_x (p0, z0, 1), z0 = svdiv_x (p0, z0, 1)) +/* +** div_m1_s32_x_untied: +** movprfx z0, z1 +** neg z0\.s, p0/m, z1\.s +** ret +*/ +TEST_UNIFORM_Z (div_m1_s32_x_untied, svint32_t, + z0 = svdiv_n_s32_x (p0, z1, -1), + z0 = svdiv_x (p0, z1, -1)) + /* ** div_1_s32_x_untied: ** mov z0\.d, z1\.d diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s16.c index a2a03aba408f21de60c1a559b5971a2b1e866011..e9b6bf83b032e8751019026faa80e7f1d76ba04a 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s16.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s16.c @@ -183,8 +183,7 @@ TEST_UNIFORM_Z (mul_3_s16_m_untied, svint16_t, /* ** mul_m1_s16_m: -** mov (z[0-9]+)\.b, #-1 -** mul z0\.h, p0/m, z0\.h, \1\.h +** neg z0\.h, p0/m, z0\.h ** ret */ TEST_UNIFORM_Z (mul_m1_s16_m, svint16_t, @@ -597,7 +596,7 @@ TEST_UNIFORM_Z (mul_255_s16_x, svint16_t, /* ** mul_m1_s16_x: -** mul z0\.h, z0\.h, #-1 +** neg z0\.h, p0/m, z0\.h ** ret */ TEST_UNIFORM_Z (mul_m1_s16_x, svint16_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c index 372b9f4a0080ed3b9360f7fddc88e29b4b01a027..71c476f48ca36297ce62f59aeb348d1f51b9318b 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c @@ -183,14 +183,23 @@ TEST_UNIFORM_Z (mul_3_s32_m_untied, svint32_t, /* ** mul_m1_s32_m: -** mov (z[0-9]+)\.b, #-1 -** mul z0\.s, p0/m, z0\.s, \1\.s +** neg z0\.s, p0/m, z0\.s ** ret */ TEST_UNIFORM_Z (mul_m1_s32_m, svint32_t, z0 = svmul_n_s32_m (p0, z0, -1), z0 = svmul_m (p0, z0, -1)) +/* +** mul_m1r_s32_m: +** mov z0\.b, #-1 +** neg z0\.s, p0/m, z1\.s +** ret +*/ +TEST_UNIFORM_Z (mul_m1r_s32_m, svint32_t, + z0 = svmul_s32_m (p0, svdup_s32 (-1), z1), + z0 = svmul_m (p0, svdup_s32 (-1), z1)) + /* ** mul_s32_z_tied1: ** movprfx z0\.s, p0/z, z0\.s @@ -597,13 +606,44 @@ TEST_UNIFORM_Z (mul_255_s32_x, svint32_t, /* ** mul_m1_s32_x: -** mul z0\.s, z0\.s, #-1 +** neg z0\.s, p0/m, z0\.s ** ret */ TEST_UNIFORM_Z (mul_m1_s32_x, svint32_t, z0 = svmul_n_s32_x (p0, z0, -1), z0 = svmul_x (p0, z0, -1)) +/* +** mul_m1r_s32_x: +** movprfx z0, z1 +** neg z0\.s, p0/m, z1\.s +** ret +*/ +TEST_UNIFORM_Z (mul_m1r_s32_x, svint32_t, + z0 = svmul_s32_x (p0, svdup_s32 (-1), z1), + z0 = svmul_x (p0, svdup_s32 (-1), z1)) + +/* +** mul_m1_s32_z: +** mov (z[0-9]+)\.d, z0\.d +** movprfx z0\.s, p0/z, \1\.s +** neg z0\.s, p0/m, \1\.s +** ret +*/ +TEST_UNIFORM_Z (mul_m1_s32_z, svint32_t, + z0 = svmul_n_s32_z (p0, z0, -1), + z0 = svmul_z (p0, z0, -1)) + +/* +** mul_m1r_s32_z: +** movprfx z0\.s, p0/z, z1\.s +** neg z0\.s, p0/m, z1\.s +** ret +*/ +TEST_UNIFORM_Z (mul_m1r_s32_z, svint32_t, + z0 = svmul_s32_z (p0, svdup_s32 (-1), z1), + z0 = svmul_z (p0, svdup_s32 (-1), z1)) + /* ** mul_m127_s32_x: ** mul z0\.s, z0\.s, #-127 diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c index c638e254655c9345ae6abbf31fd6ef8024a265e4..a34dc27740a811bc41271ccaa5b2121f108ba2df 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c @@ -192,8 +192,7 @@ TEST_UNIFORM_Z (mul_3_s64_m_untied, svint64_t, /* ** mul_m1_s64_m: -** mov (z[0-9]+)\.b, #-1 -** mul z0\.d, p0/m, z0\.d, \1\.d +** neg z0\.d, p0/m, z0\.d ** ret */ TEST_UNIFORM_Z (mul_m1_s64_m, svint64_t, @@ -625,7 +624,7 @@ TEST_UNIFORM_Z (mul_255_s64_x, svint64_t, /* ** mul_m1_s64_x: -** mul z0\.d, z0\.d, #-1 +** neg z0\.d, p0/m, z0\.d ** ret */ TEST_UNIFORM_Z (mul_m1_s64_x, svint64_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c index 37a490ff6112b98aaa51ffcf134de0d8adb88887..683e15eccecb8a43cc03a2b9a417f0aad7a26f7d 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c @@ -183,8 +183,7 @@ TEST_UNIFORM_Z (mul_3_s8_m_untied, svint8_t, /* ** mul_m1_s8_m: -** mov (z[0-9]+)\.b, #-1 -** mul z0\.b, p0/m, z0\.b, \1\.b +** neg z0\.b, p0/m, z0\.b ** ret */ TEST_UNIFORM_Z (mul_m1_s8_m, svint8_t, @@ -587,7 +586,7 @@ TEST_UNIFORM_Z (mul_128_s8_x, svint8_t, /* ** mul_255_s8_x: -** mul z0\.b, z0\.b, #-1 +** neg z0\.b, p0/m, z0\.b ** ret */ TEST_UNIFORM_Z (mul_255_s8_x, svint8_t, @@ -596,7 +595,7 @@ TEST_UNIFORM_Z (mul_255_s8_x, svint8_t, /* ** mul_m1_s8_x: -** mul z0\.b, z0\.b, #-1 +** neg z0\.b, p0/m, z0\.b ** ret */ TEST_UNIFORM_Z (mul_m1_s8_x, svint8_t, diff --git a/gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c b/gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c index c96bb2763dcef9b4188b2a076d431da8726b735d..60cf8345d6a752da1d60a71def01419a21e838f6 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/div_const_run.c @@ -42,7 +42,9 @@ typedef svuint64_t svuint64_ __attribute__((arm_sve_vector_bits(128))); TEST_TYPES_1 (uint64, u64) #define TEST_VALUES_S_1(B, OP1, OP2) \ - F (int##B, s##B, x, OP1, OP2) + F (int##B, s##B, x, OP1, OP2) \ + F (int##B, s##B, z, OP1, OP2) \ + F (int##B, s##B, m, OP1, OP2) #define TEST_VALUES_S \ TEST_VALUES_S_1 (32, INT32_MIN, INT32_MIN) \ @@ -60,7 +62,11 @@ typedef svuint64_t svuint64_ __attribute__((arm_sve_vector_bits(128))); TEST_VALUES_S_1 (32, INT32_MAX, -5) \ TEST_VALUES_S_1 (64, INT64_MAX, -5) \ TEST_VALUES_S_1 (32, INT32_MIN, -4) \ - TEST_VALUES_S_1 (64, INT64_MIN, -4) + TEST_VALUES_S_1 (64, INT64_MIN, -4) \ + TEST_VALUES_S_1 (32, INT32_MAX, -1) \ + TEST_VALUES_S_1 (32, -7, -1) \ + TEST_VALUES_S_1 (64, INT64_MIN, -1) \ + TEST_VALUES_S_1 (64, 16, -1) #define TEST_VALUES_U_1(B, OP1, OP2) \ F (uint##B, u##B, x, OP1, OP2) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mul_const_run.c b/gcc/testsuite/gcc.target/aarch64/sve/mul_const_run.c index c369d5be167252cc803cf4516df7d7f72fabbb90..eb897d622fcba7a1705abe2033839ae2e745a59c 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/mul_const_run.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/mul_const_run.c @@ -44,7 +44,9 @@ typedef svuint64_t svuint64_ __attribute__((arm_sve_vector_bits(128))); TEST_TYPES_1 (uint64, u64) #define TEST_VALUES_S_1(B, OP1, OP2) \ - F (int##B, s##B, x, OP1, OP2) + F (int##B, s##B, x, OP1, OP2) \ + F (int##B, s##B, m, OP1, OP2) \ + F (int##B, s##B, z, OP1, OP2) #define TEST_VALUES_S \ TEST_VALUES_S_1 (32, INT32_MIN, INT32_MIN) \ @@ -70,7 +72,11 @@ typedef svuint64_t svuint64_ __attribute__((arm_sve_vector_bits(128))); TEST_VALUES_S_1 (32, INT32_MAX, -5) \ TEST_VALUES_S_1 (64, INT64_MAX, -5) \ TEST_VALUES_S_1 (32, INT32_MIN, -4) \ - TEST_VALUES_S_1 (64, INT64_MIN, -4) + TEST_VALUES_S_1 (64, INT64_MIN, -4) \ + TEST_VALUES_S_1 (32, INT32_MAX, -1) \ + TEST_VALUES_S_1 (32, -7, -1) \ + TEST_VALUES_S_1 (64, INT64_MIN, -1) \ + TEST_VALUES_S_1 (64, 16, -1) #define TEST_VALUES_U_1(B, OP1, OP2) \ F (uint##B, u##B, x, OP1, OP2)