diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 31df3a613dd3fd361d96229d324aaab57f2a4bf4..ea79e0bdda270db93c426bb0ae64746d30a1e520 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -855,8 +855,8 @@ BDESC (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv1di3, "__ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF) BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF) -BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) -BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF) +BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_vec_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) +BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_vec_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF) BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF) BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF) @@ -996,8 +996,8 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128 /* AVX */ BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF) -BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF) -BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF) +BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_vec_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF) +BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_vec_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF) BDESC (OPTION_MASK_ISA_AVX, 0, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5bd65dd931217d27fbb0ee881c4f6e9e1f0bfa91..1f1db8214ccfc621ea87edab978c269899cc99af 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -2410,7 +2410,7 @@ (set_attr "prefix" "<round_saeonly_scalar_prefix>") (set_attr "mode" "<ssescalarmode>")]) -(define_insn "avx_addsubv4df3" +(define_insn "vec_addsubv4df3" [(set (match_operand:V4DF 0 "register_operand" "=x") (vec_merge:V4DF (minus:V4DF @@ -2424,7 +2424,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "V4DF")]) -(define_insn "sse3_addsubv2df3" +(define_insn "vec_addsubv2df3" [(set (match_operand:V2DF 0 "register_operand" "=x,x") (vec_merge:V2DF (minus:V2DF @@ -2442,7 +2442,7 @@ (set_attr "prefix" "orig,vex") (set_attr "mode" "V2DF")]) -(define_insn "avx_addsubv8sf3" +(define_insn "vec_addsubv8sf3" [(set (match_operand:V8SF 0 "register_operand" "=x") (vec_merge:V8SF (minus:V8SF @@ -2456,7 +2456,7 @@ (set_attr "prefix" "vex") (set_attr "mode" "V8SF")]) -(define_insn "sse3_addsubv4sf3" +(define_insn "vec_addsubv4sf3" [(set (match_operand:V4SF 0 "register_operand" "=x,x") (vec_merge:V4SF (minus:V4SF diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 00caf3844ccf8ea289d581839766502d51b9e8d7..1b91814433057b1b377283fd1f40cb970dc3d243 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -5682,6 +5682,14 @@ signed/unsigned elements of size S@. Subtract the high/low elements of 2 from 1 and widen the resulting elements. Put the N/2 results of size 2*S in the output vector (operand 0). +@cindex @code{vec_addsub@var{m}3} instruction pattern +@item @samp{vec_addsub@var{m}3} +Alternating subtract, add with even lanes doing subtract and odd +lanes doing addition. Operands 1 and 2 and the outout operand are vectors +with mode @var{m}. + +These instructions are not allowed to @code{FAIL}. + @cindex @code{mulhisi3} instruction pattern @item @samp{mulhisi3} Multiply operands 1 and 2, which have mode @code{HImode}, and store diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index b2f414d2131b867eda337cd30f5ed40ed7c9fa10..c3b8e730960c5f95b90a4dad4a451bd0240257bd 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -281,6 +281,7 @@ DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT90, ECF_CONST, cadd90, binary) DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT270, ECF_CONST, cadd270, binary) DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL, ECF_CONST, cmul, binary) DEF_INTERNAL_OPTAB_FN (COMPLEX_MUL_CONJ, ECF_CONST, cmul_conj, binary) +DEF_INTERNAL_OPTAB_FN (VEC_ADDSUB, ECF_CONST, vec_addsub, binary) /* FP scales. */ diff --git a/gcc/optabs.def b/gcc/optabs.def index b192a9d070b8aa72e5676b2eaa020b5bdd7ffcc8..41ab2598eb6c32c003cbed490796abf25d2ee315 100644 --- a/gcc/optabs.def +++ b/gcc/optabs.def @@ -407,6 +407,7 @@ OPTAB_D (vec_widen_usubl_hi_optab, "vec_widen_usubl_hi_$a") OPTAB_D (vec_widen_usubl_lo_optab, "vec_widen_usubl_lo_$a") OPTAB_D (vec_widen_uaddl_hi_optab, "vec_widen_uaddl_hi_$a") OPTAB_D (vec_widen_uaddl_lo_optab, "vec_widen_uaddl_lo_$a") +OPTAB_D (vec_addsub_optab, "vec_addsub$a3") OPTAB_D (sync_add_optab, "sync_add$I$a") OPTAB_D (sync_and_optab, "sync_and$I$a") diff --git a/gcc/testsuite/gcc.target/i386/vect-addsub-2.c b/gcc/testsuite/gcc.target/i386/vect-addsub-2.c new file mode 100644 index 0000000000000000000000000000000000000000..a6b941461e8e9fd8a9cad899d87c9e919d19529a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-addsub-2.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target sse3 } */ +/* { dg-options "-O3 -msse3" } */ + +float a[1024], b[1024]; + +void foo() +{ + for (int i = 0; i < 256; i++) + { + a[4*i+0] = a[4*i+0] - b[4*i+0]; + a[4*i+1] = a[4*i+1] + b[4*i+1]; + a[4*i+2] = a[4*i+2] - b[4*i+2]; + a[4*i+3] = a[4*i+3] + b[4*i+3]; + } +} + +/* We should be able to vectorize this with SLP using the addsub + SLP pattern. */ +/* { dg-final { scan-assembler "addsubps" } } */ +/* { dg-final { scan-assembler-not "shuf" } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-addsub-3.c b/gcc/testsuite/gcc.target/i386/vect-addsub-3.c new file mode 100644 index 0000000000000000000000000000000000000000..b27ee56bd73ee1c30ad663a960e544741b17a71b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-addsub-3.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse3 } */ +/* { dg-options "-O3 -msse3" } */ + +#ifndef CHECK_H +#define CHECK_H "sse3-check.h" +#endif + +#ifndef TEST +#define TEST sse3_test +#endif + +#include CHECK_H + +double a[2], b[2], c[2]; + +void __attribute__((noipa)) +foo () +{ + /* When we want to use addsubpd we have to keep permuting both + loads, if instead we blend the result of an add and a sub we + can combine the blend with the permute. Both are similar in cost, + verify we did not wrongly apply both. */ + double tem0 = a[1] - b[1]; + double tem1 = a[0] + b[0]; + c[0] = tem0; + c[1] = tem1; +} + +static void +TEST (void) +{ + a[0] = 1.; a[1] = 2.; + b[0] = 2.; b[1] = 4.; + foo (); + if (c[0] != -2. || c[1] != 3.) + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/vect-addsubv2df.c b/gcc/testsuite/gcc.target/i386/vect-addsubv2df.c new file mode 100644 index 0000000000000000000000000000000000000000..547485d5519df8f53f9dd30e3b9e0bf17d6d740c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-addsubv2df.c @@ -0,0 +1,42 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse3 } */ +/* { dg-options "-O3 -msse3 -fdump-tree-slp2" } */ + +#ifndef CHECK_H +#define CHECK_H "sse3-check.h" +#endif + +#ifndef TEST +#define TEST sse3_test +#endif + +#include CHECK_H + +double x[2], y[2], z[2]; +void __attribute__((noipa)) foo () +{ + x[0] = y[0] - z[0]; + x[1] = y[1] + z[1]; +} +void __attribute__((noipa)) bar () +{ + x[0] = y[0] + z[0]; + x[1] = y[1] - z[1]; +} +static void +TEST (void) +{ + for (int i = 0; i < 2; ++i) + { + y[i] = i + 1; + z[i] = 2 * i + 1; + } + foo (); + if (x[0] != 0 || x[1] != 5) + __builtin_abort (); + bar (); + if (x[0] != 2 || x[1] != -1) + __builtin_abort (); +} + +/* { dg-final { scan-tree-dump-times "ADDSUB" 1 "slp2" } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-addsubv4df.c b/gcc/testsuite/gcc.target/i386/vect-addsubv4df.c new file mode 100644 index 0000000000000000000000000000000000000000..e0a1b3d9d00f579efe0c45e399ba736fb46466e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-addsubv4df.c @@ -0,0 +1,36 @@ +/* { dg-do run { target avx_runtime } } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O3 -mavx -fdump-tree-slp2" } */ + +double x[4], y[4], z[4]; +void __attribute__((noipa)) foo () +{ + x[0] = y[0] - z[0]; + x[1] = y[1] + z[1]; + x[2] = y[2] - z[2]; + x[3] = y[3] + z[3]; +} +void __attribute__((noipa)) bar () +{ + x[0] = y[0] + z[0]; + x[1] = y[1] - z[1]; + x[2] = y[2] + z[2]; + x[3] = y[3] - z[3]; +} +int main() +{ + for (int i = 0; i < 4; ++i) + { + y[i] = i + 1; + z[i] = 2 * i + 1; + } + foo (); + if (x[0] != 0 || x[1] != 5 || x[2] != -2 || x[3] != 11) + __builtin_abort (); + bar (); + if (x[0] != 2 || x[1] != -1 || x[2] != 8 || x[3] != -3) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "ADDSUB" 1 "slp2" } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-addsubv4sf.c b/gcc/testsuite/gcc.target/i386/vect-addsubv4sf.c new file mode 100644 index 0000000000000000000000000000000000000000..b524f0c35a809cfdb654d66edf3ee95aa8ccd8d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-addsubv4sf.c @@ -0,0 +1,46 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse3 } */ +/* { dg-options "-O3 -msse3 -fdump-tree-slp2" } */ + +#ifndef CHECK_H +#define CHECK_H "sse3-check.h" +#endif + +#ifndef TEST +#define TEST sse3_test +#endif + +#include CHECK_H + +float x[4], y[4], z[4]; +void __attribute__((noipa)) foo () +{ + x[0] = y[0] - z[0]; + x[1] = y[1] + z[1]; + x[2] = y[2] - z[2]; + x[3] = y[3] + z[3]; +} +void __attribute__((noipa)) bar () +{ + x[0] = y[0] + z[0]; + x[1] = y[1] - z[1]; + x[2] = y[2] + z[2]; + x[3] = y[3] - z[3]; +} +static void +TEST (void) +{ + for (int i = 0; i < 4; ++i) + { + y[i] = i + 1; + z[i] = 2 * i + 1; + } + foo (); + if (x[0] != 0 || x[1] != 5 || x[2] != -2 || x[3] != 11) + __builtin_abort (); + bar (); + if (x[0] != 2 || x[1] != -1 || x[2] != 8 || x[3] != -3) + __builtin_abort (); +} + +/* { dg-final { scan-tree-dump-times "ADDSUB" 1 "slp2" } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-addsubv8sf.c b/gcc/testsuite/gcc.target/i386/vect-addsubv8sf.c new file mode 100644 index 0000000000000000000000000000000000000000..0eed33b65319ddb7404061ee37f0afdccb3da91a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-addsubv8sf.c @@ -0,0 +1,46 @@ +/* { dg-do run { target avx_runtime } } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O3 -mavx -fdump-tree-slp2" } */ + +float x[8], y[8], z[8]; +void __attribute__((noipa)) foo () +{ + x[0] = y[0] - z[0]; + x[1] = y[1] + z[1]; + x[2] = y[2] - z[2]; + x[3] = y[3] + z[3]; + x[4] = y[4] - z[4]; + x[5] = y[5] + z[5]; + x[6] = y[6] - z[6]; + x[7] = y[7] + z[7]; +} +void __attribute__((noipa)) bar () +{ + x[0] = y[0] + z[0]; + x[1] = y[1] - z[1]; + x[2] = y[2] + z[2]; + x[3] = y[3] - z[3]; + x[4] = y[4] + z[4]; + x[5] = y[5] - z[5]; + x[6] = y[6] + z[6]; + x[7] = y[7] - z[7]; +} +int main() +{ + for (int i = 0; i < 8; ++i) + { + y[i] = i + 1; + z[i] = 2 * i + 1; + } + foo (); + if (x[0] != 0 || x[1] != 5 || x[2] != -2 || x[3] != 11 + || x[4] != -4 || x[5] != 17 || x[6] != -6 || x[7] != 23) + __builtin_abort (); + bar (); + if (x[0] != 2 || x[1] != -1 || x[2] != 8 || x[3] != -3 + || x[4] != 14 || x[5] != -5 || x[6] != 20 || x[7] != -7) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump-times "ADDSUB" 1 "slp2" } } */ diff --git a/gcc/tree-vect-slp-patterns.c b/gcc/tree-vect-slp-patterns.c index 2ed49cd9edcabd7948b365dd60d7405b79079a7b..d536494a1bd780b41b90f2d2e5b109144c96b1a0 100644 --- a/gcc/tree-vect-slp-patterns.c +++ b/gcc/tree-vect-slp-patterns.c @@ -1490,6 +1490,105 @@ complex_operations_pattern::build (vec_info * /* vinfo */) gcc_unreachable (); } + +/* The addsub_pattern. */ + +class addsub_pattern : public vect_pattern +{ + public: + addsub_pattern (slp_tree *node) + : vect_pattern (node, NULL, IFN_VEC_ADDSUB) {}; + + void build (vec_info *); + + static vect_pattern* + recognize (slp_tree_to_load_perm_map_t *, slp_tree *); +}; + +vect_pattern * +addsub_pattern::recognize (slp_tree_to_load_perm_map_t *, slp_tree *node_) +{ + slp_tree node = *node_; + if (SLP_TREE_CODE (node) != VEC_PERM_EXPR + || SLP_TREE_CHILDREN (node).length () != 2) + return NULL; + + /* Match a blend of a plus and a minus op with the same number of plus and + minus lanes on the same operands. */ + slp_tree sub = SLP_TREE_CHILDREN (node)[0]; + slp_tree add = SLP_TREE_CHILDREN (node)[1]; + bool swapped_p = false; + if (vect_match_expression_p (sub, PLUS_EXPR)) + { + std::swap (add, sub); + swapped_p = true; + } + if (!(vect_match_expression_p (add, PLUS_EXPR) + && vect_match_expression_p (sub, MINUS_EXPR))) + return NULL; + if (!((SLP_TREE_CHILDREN (sub)[0] == SLP_TREE_CHILDREN (add)[0] + && SLP_TREE_CHILDREN (sub)[1] == SLP_TREE_CHILDREN (add)[1]) + || (SLP_TREE_CHILDREN (sub)[0] == SLP_TREE_CHILDREN (add)[1] + && SLP_TREE_CHILDREN (sub)[1] == SLP_TREE_CHILDREN (add)[0]))) + return NULL; + + for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i) + { + std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i]; + if (swapped_p) + perm.first = perm.first == 0 ? 1 : 0; + /* It has to be alternating -, +, -, ... + While we could permute the .ADDSUB inputs and the .ADDSUB output + that's only profitable over the add + sub + blend if at least + one of the permute is optimized which we can't determine here. */ + if (perm.first != (i & 1) + || perm.second != i) + return NULL; + } + + if (!vect_pattern_validate_optab (IFN_VEC_ADDSUB, node)) + return NULL; + + return new addsub_pattern (node_); +} + +void +addsub_pattern::build (vec_info *vinfo) +{ + slp_tree node = *m_node; + + slp_tree sub = SLP_TREE_CHILDREN (node)[0]; + slp_tree add = SLP_TREE_CHILDREN (node)[1]; + if (vect_match_expression_p (sub, PLUS_EXPR)) + std::swap (add, sub); + + /* Modify the blend node in-place. */ + SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0]; + SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1]; + SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++; + SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++; + + /* Build IFN_VEC_ADDSUB from the sub representative operands. */ + stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub); + gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2, + gimple_assign_rhs1 (rep->stmt), + gimple_assign_rhs2 (rep->stmt)); + gimple_call_set_lhs (call, make_ssa_name + (TREE_TYPE (gimple_assign_lhs (rep->stmt)))); + gimple_call_set_nothrow (call, true); + gimple_set_bb (call, gimple_bb (rep->stmt)); + SLP_TREE_REPRESENTATIVE (node) = vinfo->add_pattern_stmt (call, rep); + STMT_VINFO_RELEVANT (SLP_TREE_REPRESENTATIVE (node)) = vect_used_in_scope; + STMT_SLP_TYPE (SLP_TREE_REPRESENTATIVE (node)) = pure_slp; + STMT_VINFO_VECTYPE (SLP_TREE_REPRESENTATIVE (node)) = SLP_TREE_VECTYPE (node); + STMT_VINFO_SLP_VECT_ONLY_PATTERN (SLP_TREE_REPRESENTATIVE (node)) = true; + SLP_TREE_CODE (node) = ERROR_MARK; + SLP_TREE_LANE_PERMUTATION (node).release (); + + vect_free_slp_tree (sub); + vect_free_slp_tree (add); +} + /******************************************************************************* * Pattern matching definitions ******************************************************************************/ @@ -1502,6 +1601,7 @@ vect_pattern_decl_t slp_patterns[] overlap in what they can detect. */ SLP_PATTERN (complex_operations_pattern), + SLP_PATTERN (addsub_pattern) }; #undef SLP_PATTERN diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 69ee8faed09499ea986e38a309b77ecea899feab..227d6aa3ee870d8da8859a2a7a5d2b7a506d6b24 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -3705,6 +3705,7 @@ vect_optimize_slp (vec_info *vinfo) case CFN_COMPLEX_ADD_ROT270: case CFN_COMPLEX_MUL: case CFN_COMPLEX_MUL_CONJ: + case CFN_VEC_ADDSUB: continue; default:; } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 5c71fbc487f76b7474e1cffc6089519e1336d7b0..fa28336d42950051b5e51d4fc1beebbc04297ff2 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2100,7 +2100,8 @@ class vect_pattern this->m_ifn = ifn; this->m_node = node; this->m_ops.create (0); - this->m_ops.safe_splice (*m_ops); + if (m_ops) + this->m_ops.safe_splice (*m_ops); } public: