diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc index 8570e18fd966e274342d766cafc42a8c3161fb7c..3c3d30bd0de1d272bd7da37bdc48446d4297a40a 100644 --- a/gcc/config/arm/arm-mve-builtins.cc +++ b/gcc/config/arm/arm-mve-builtins.cc @@ -2358,7 +2358,37 @@ function_expander::add_input_operand (insn_code icode, rtx x) mode = GET_MODE (x); } else if (VALID_MVE_PRED_MODE (mode)) - x = gen_lowpart (mode, x); + { + if (CONST_INT_P (x)) + { + if (mode == V8BImode || mode == V4BImode) + { + /* In V8BI or V4BI each element has 2 or 4 bits, if those bits + aren't all the same, gen_lowpart might ICE. Canonicalize all + the 2 or 4 bits to all ones if any of them is non-zero. V8BI + and V4BI multi-bit masks are interpreted byte-by-byte at + instruction level, but such constants should describe lanes, + rather than bytes. See the section on MVE intrinsics in the + Arm ACLE specification. */ + unsigned HOST_WIDE_INT xi = UINTVAL (x); + xi |= ((xi & 0x5555) << 1) | ((xi & 0xaaaa) >> 1); + if (mode == V4BImode) + xi |= ((xi & 0x3333) << 2) | ((xi & 0xcccc) >> 2); + if (xi != UINTVAL (x)) + warning_at (location, 0, "constant predicate argument %d" + " (%wx) does not map to %d lane numbers," + " converted to %wx", + opno, UINTVAL (x) & 0xffff, + mode == V8BImode ? 8 : 4, + xi & 0xffff); + + x = gen_int_mode (xi, HImode); + } + x = gen_lowpart (mode, x); + } + else + x = force_lowpart_subreg (mode, x, GET_MODE (x)); + } m_ops.safe_grow (m_ops.length () + 1, true); create_input_operand (&m_ops.last (), x, mode); diff --git a/gcc/testsuite/gcc.target/arm/mve/pr108443-run.c b/gcc/testsuite/gcc.target/arm/mve/pr108443-run.c index cb4b45bd30563c536a5cdc08147970e077abbf37..b894f019b8bb795fd1f4a450ec913b18058931f2 100644 --- a/gcc/testsuite/gcc.target/arm/mve/pr108443-run.c +++ b/gcc/testsuite/gcc.target/arm/mve/pr108443-run.c @@ -16,7 +16,7 @@ __attribute__ ((noipa)) partial_write (uint32_t *a, uint32x4_t v, unsigned short int main (void) { - unsigned short p = 0x00CC; + unsigned short p = 0x00FF; uint32_t a[] = {0, 0, 0, 0}; uint32_t b[] = {0, 0, 0, 0}; uint32x4_t v = vdupq_n_u32 (0xFFFFFFFFU); diff --git a/gcc/testsuite/gcc.target/arm/mve/pr108443.c b/gcc/testsuite/gcc.target/arm/mve/pr108443.c index c5fbfa4a1bb72313bd820f5f0859b3f13464e0c3..0c0e2dd6eb8f911f74710e9f0eaa9613e31c69ab 100644 --- a/gcc/testsuite/gcc.target/arm/mve/pr108443.c +++ b/gcc/testsuite/gcc.target/arm/mve/pr108443.c @@ -7,8 +7,8 @@ void __attribute__ ((noipa)) partial_write_cst (uint32_t *a, uint32x4_t v) { - vstrwq_p_u32 (a, v, 0x00CC); + vstrwq_p_u32 (a, v, 0x00FF); } -/* { dg-final { scan-assembler {mov\tr[0-9]+, #204} } } */ +/* { dg-final { scan-assembler {mov\tr[0-9]+, #255} } } */ diff --git a/gcc/testsuite/gcc.target/arm/mve/pr114801.c b/gcc/testsuite/gcc.target/arm/mve/pr114801.c new file mode 100644 index 0000000000000000000000000000000000000000..ab3130fd4ce8d0571427777b8eeec441f90191af --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/mve/pr114801.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include <arm_mve.h> + +/* +** test_32: +**... +** mov r[0-9]+, #65295 @ movhi +**... +*/ +uint32x4_t test_32() { + /* V4BI predicate converted to 0xff0f. */ + return vdupq_m_n_u32(vdupq_n_u32(0xffffffff), 0, 0x4f02); /* { dg-warning {constant predicate argument 3 \(0x4f02\) does not map to 4 lane numbers, converted to 0xff0f} } */ +} + +/* +** test_16: +**... +** mov r[0-9]+, #12339 @ movhi +**... +*/ +uint16x8_t test_16() { + /* V8BI predicate converted to 0x3033. */ + return vdupq_m_n_u16(vdupq_n_u16(0xffff), 0, 0x3021); /* { dg-warning {constant predicate argument 3 \(0x3021\) does not map to 8 lane numbers, converted to 0x3033} } */ +} + +/* +** test_8: +**... +** mov r[0-9]+, #23055 @ movhi +**... +*/ +uint8x16_t test_8() { + return vdupq_m_n_u8(vdupq_n_u8(0xff), 0, 0x5a0f); +}