diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a41b8e0841cd92902e1b850cc803303e2421785c..f557c9ff453748495447d46a12370a3538c38bff 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,23 @@ +2016-04-28 Claudiu Zissulescu <claziss@synopsys.com> + + * config/arc/arc.c (arc_vector_mode_supported_p): Add support for + the new ARC HS SIMD instructions. + (arc_preferred_simd_mode): New function. + (arc_autovectorize_vector_sizes): Likewise. + (TARGET_VECTORIZE_PREFERRED_SIMD_MODE) + (TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define. + (arc_init_reg_tables): Accept new ARC HS SIMD modes. + (arc_init_builtins): Add new SIMD builtin types. + (arc_split_move): Handle 64 bit vector moves. + * config/arc/arc.h (TARGET_PLUS_DMPY, TARGET_PLUS_MACD) + (TARGET_PLUS_QMACW): Define. + * config/arc/builtins.def (QMACH, QMACHU, QMPYH, QMPYHU, DMACH) + (DMACHU, DMPYH, DMPYHU, DMACWH, DMACWHU, VMAC2H, VMAC2HU, VMPY2H) + (VMPY2HU, VADDSUB2H, VSUBADD2H, VADDSUB, VSUBADD, VADDSUB4H) + (VSUBADD4H): New builtins. + * config/arc/simdext.md: Add new ARC HS SIMD instructions. + * testsuite/gcc.target/arc/builtin_simdarc.c: New file. + 2016-04-28 Eduard Sanou <dhole@openmailbox.org> Matthias Klose <doko@debian.org> diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c index d60db502ef85eb07cca0d582a21265584e51b714..d120946a5f2fa78ecf8df464d108ffa8adb0a536 100644 --- a/gcc/config/arc/arc.c +++ b/gcc/config/arc/arc.c @@ -247,16 +247,47 @@ static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT, static bool arc_vector_mode_supported_p (machine_mode mode) { - if (!TARGET_SIMD_SET) - return false; + switch (mode) + { + case V2HImode: + return TARGET_PLUS_DMPY; + case V4HImode: + case V2SImode: + return TARGET_PLUS_QMACW; + case V4SImode: + case V8HImode: + return TARGET_SIMD_SET; - if ((mode == V4SImode) - || (mode == V8HImode)) - return true; + default: + return false; + } +} - return false; +/* Implements target hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE. */ + +static enum machine_mode +arc_preferred_simd_mode (enum machine_mode mode) +{ + switch (mode) + { + case HImode: + return TARGET_PLUS_QMACW ? V4HImode : V2HImode; + case SImode: + return V2SImode; + + default: + return word_mode; + } } +/* Implements target hook + TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES. */ + +static unsigned int +arc_autovectorize_vector_sizes (void) +{ + return TARGET_PLUS_QMACW ? (8 | 4) : 0; +} /* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review. */ static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED; @@ -345,6 +376,12 @@ static void arc_finalize_pic (void); #undef TARGET_VECTOR_MODE_SUPPORTED_P #define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p +#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE +#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arc_preferred_simd_mode + +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES arc_autovectorize_vector_sizes + #undef TARGET_CAN_USE_DOLOOP_P #define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p @@ -1214,7 +1251,12 @@ arc_init_reg_tables (void) arc_mode_class[i] = 0; break; case MODE_VECTOR_INT: - arc_mode_class [i] = (1<< (int) V_MODE); + if (GET_MODE_SIZE (m) == 4) + arc_mode_class[i] = (1 << (int) S_MODE); + else if (GET_MODE_SIZE (m) == 8) + arc_mode_class[i] = (1 << (int) D_MODE); + else + arc_mode_class[i] = (1 << (int) V_MODE); break; case MODE_CC: default: @@ -5277,6 +5319,15 @@ arc_builtin_decl (unsigned id, bool initialize_p ATTRIBUTE_UNUSED) static void arc_init_builtins (void) { + tree V4HI_type_node; + tree V2SI_type_node; + tree V2HI_type_node; + + /* Vector types based on HS SIMD elements. */ + V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); + V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); + V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode); + tree pcvoid_type_node = build_pointer_type (build_qualified_type (void_type_node, TYPE_QUAL_CONST)); @@ -5341,6 +5392,28 @@ arc_init_builtins (void) tree v8hi_ftype_v8hi = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE); + /* ARCv2 SIMD types. */ + tree long_ftype_v4hi_v4hi + = build_function_type_list (long_long_integer_type_node, + V4HI_type_node, V4HI_type_node, NULL_TREE); + tree int_ftype_v2hi_v2hi + = build_function_type_list (integer_type_node, + V2HI_type_node, V2HI_type_node, NULL_TREE); + tree v2si_ftype_v2hi_v2hi + = build_function_type_list (V2SI_type_node, + V2HI_type_node, V2HI_type_node, NULL_TREE); + tree v2hi_ftype_v2hi_v2hi + = build_function_type_list (V2HI_type_node, + V2HI_type_node, V2HI_type_node, NULL_TREE); + tree v2si_ftype_v2si_v2si + = build_function_type_list (V2SI_type_node, + V2SI_type_node, V2SI_type_node, NULL_TREE); + tree v4hi_ftype_v4hi_v4hi + = build_function_type_list (V4HI_type_node, + V4HI_type_node, V4HI_type_node, NULL_TREE); + tree long_ftype_v2si_v2hi + = build_function_type_list (long_long_integer_type_node, + V2SI_type_node, V2HI_type_node, NULL_TREE); /* Add the builtins. */ #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK) \ @@ -8706,6 +8779,31 @@ arc_split_move (rtx *operands) return; } + if (TARGET_PLUS_QMACW + && GET_CODE (operands[1]) == CONST_VECTOR) + { + HOST_WIDE_INT intval0, intval1; + if (GET_MODE (operands[1]) == V2SImode) + { + intval0 = INTVAL (XVECEXP (operands[1], 0, 0)); + intval1 = INTVAL (XVECEXP (operands[1], 0, 1)); + } + else + { + intval1 = INTVAL (XVECEXP (operands[1], 0, 3)) << 16; + intval1 |= INTVAL (XVECEXP (operands[1], 0, 2)) & 0xFFFF; + intval0 = INTVAL (XVECEXP (operands[1], 0, 1)) << 16; + intval0 |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF; + } + xop[0] = gen_rtx_REG (SImode, REGNO (operands[0])); + xop[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); + xop[2] = GEN_INT (trunc_int_for_mode (intval0, SImode)); + xop[1] = GEN_INT (trunc_int_for_mode (intval1, SImode)); + emit_move_insn (xop[0], xop[2]); + emit_move_insn (xop[3], xop[1]); + return; + } + for (i = 0; i < 2; i++) { if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0))) diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h index 1c2a38d4acfc10ffde19cd4f8a1d08fd802234bf..5100a5b8f821f3e5a6dfa5f986790ddc3ac60674 100644 --- a/gcc/config/arc/arc.h +++ b/gcc/config/arc/arc.h @@ -1724,6 +1724,12 @@ enum /* Any multiplication feature macro. */ #define TARGET_ANY_MPY \ (TARGET_MPY || TARGET_MUL64_SET || TARGET_MULMAC_32BY16_SET) +/* PLUS_DMPY feature macro. */ +#define TARGET_PLUS_DMPY ((arc_mpy_option > 6) && TARGET_HS) +/* PLUS_MACD feature macro. */ +#define TARGET_PLUS_MACD ((arc_mpy_option > 7) && TARGET_HS) +/* PLUS_QMACW feature macro. */ +#define TARGET_PLUS_QMACW ((arc_mpy_option > 8) && TARGET_HS) /* ARC600 and ARC601 feature macro. */ #define TARGET_ARC600_FAMILY (TARGET_ARC600 || TARGET_ARC601) diff --git a/gcc/config/arc/builtins.def b/gcc/config/arc/builtins.def index 19be1d218520b19993f07bbf08077c5c9412f381..8c71d30a459a74fe5e7d2c4c98a4f6a942616997 100644 --- a/gcc/config/arc/builtins.def +++ b/gcc/config/arc/builtins.def @@ -193,3 +193,30 @@ DEF_BUILTIN (VINTI, 1, void_ftype_int, vinti_insn, TARGET_SIMD_SET) /* END SIMD marker. */ DEF_BUILTIN (SIMD_END, 0, void_ftype_void, nothing, 0) + +/* ARCv2 SIMD instructions that use/clobber the accumulator reg. */ +DEF_BUILTIN (QMACH, 2, long_ftype_v4hi_v4hi, qmach, TARGET_PLUS_QMACW) +DEF_BUILTIN (QMACHU, 2, long_ftype_v4hi_v4hi, qmachu, TARGET_PLUS_QMACW) +DEF_BUILTIN (QMPYH, 2, long_ftype_v4hi_v4hi, qmpyh, TARGET_PLUS_QMACW) +DEF_BUILTIN (QMPYHU, 2, long_ftype_v4hi_v4hi, qmpyhu, TARGET_PLUS_QMACW) + +DEF_BUILTIN (DMACH, 2, int_ftype_v2hi_v2hi, dmach, TARGET_PLUS_DMPY) +DEF_BUILTIN (DMACHU, 2, int_ftype_v2hi_v2hi, dmachu, TARGET_PLUS_DMPY) +DEF_BUILTIN (DMPYH, 2, int_ftype_v2hi_v2hi, dmpyh, TARGET_PLUS_DMPY) +DEF_BUILTIN (DMPYHU, 2, int_ftype_v2hi_v2hi, dmpyhu, TARGET_PLUS_DMPY) + +DEF_BUILTIN (DMACWH, 2, long_ftype_v2si_v2hi, dmacwh, TARGET_PLUS_QMACW) +DEF_BUILTIN (DMACWHU, 2, long_ftype_v2si_v2hi, dmacwhu, TARGET_PLUS_QMACW) + +DEF_BUILTIN (VMAC2H, 2, v2si_ftype_v2hi_v2hi, vmac2h, TARGET_PLUS_MACD) +DEF_BUILTIN (VMAC2HU, 2, v2si_ftype_v2hi_v2hi, vmac2hu, TARGET_PLUS_MACD) +DEF_BUILTIN (VMPY2H, 2, v2si_ftype_v2hi_v2hi, vmpy2h, TARGET_PLUS_MACD) +DEF_BUILTIN (VMPY2HU, 2, v2si_ftype_v2hi_v2hi, vmpy2hu, TARGET_PLUS_MACD) + +/* Combined add/sub HS SIMD instructions. */ +DEF_BUILTIN (VADDSUB2H, 2, v2hi_ftype_v2hi_v2hi, addsubv2hi3, TARGET_PLUS_DMPY) +DEF_BUILTIN (VSUBADD2H, 2, v2hi_ftype_v2hi_v2hi, subaddv2hi3, TARGET_PLUS_DMPY) +DEF_BUILTIN (VADDSUB, 2, v2si_ftype_v2si_v2si, addsubv2si3, TARGET_PLUS_QMACW) +DEF_BUILTIN (VSUBADD, 2, v2si_ftype_v2si_v2si, subaddv2si3, TARGET_PLUS_QMACW) +DEF_BUILTIN (VADDSUB4H, 2, v4hi_ftype_v4hi_v4hi, addsubv4hi3, TARGET_PLUS_QMACW) +DEF_BUILTIN (VSUBADD4H, 2, v4hi_ftype_v4hi_v4hi, subaddv4hi3, TARGET_PLUS_QMACW) diff --git a/gcc/config/arc/simdext.md b/gcc/config/arc/simdext.md index 9fd9d62e048341ccc673dbba1c9613f12636ff77..51869e367726522dc70b2191df1f9228455239cd 100644 --- a/gcc/config/arc/simdext.md +++ b/gcc/config/arc/simdext.md @@ -1288,3 +1288,574 @@ [(set_attr "type" "simd_vcontrol") (set_attr "length" "4") (set_attr "cond" "nocond")]) + +;; New ARCv2 SIMD extensions + +;;64-bit vectors of halwords and words +(define_mode_iterator VWH [V4HI V2SI]) + +;;double element vectors +(define_mode_iterator VDV [V2HI V2SI]) +(define_mode_attr V_addsub [(V2HI "HI") (V2SI "SI")]) +(define_mode_attr V_addsub_suffix [(V2HI "2h") (V2SI "")]) + +;;all vectors +(define_mode_iterator VCT [V2HI V4HI V2SI]) +(define_mode_attr V_suffix [(V2HI "2h") (V4HI "4h") (V2SI "2")]) + +;; Widening operations. +(define_code_iterator SE [sign_extend zero_extend]) +(define_code_attr V_US [(sign_extend "s") (zero_extend "u")]) +(define_code_attr V_US_suffix [(sign_extend "") (zero_extend "u")]) + + +;; Move patterns +(define_expand "movv2hi" + [(set (match_operand:V2HI 0 "move_dest_operand" "") + (match_operand:V2HI 1 "general_operand" ""))] + "" + "{ + if (prepare_move_operands (operands, V2HImode)) + DONE; + }") + +(define_insn_and_split "*movv2hi_insn" + [(set (match_operand:V2HI 0 "nonimmediate_operand" "=r,r,r,m") + (match_operand:V2HI 1 "general_operand" "i,r,m,r"))] + "(register_operand (operands[0], V2HImode) + || register_operand (operands[1], V2HImode))" + "@ + # + mov%? %0, %1 + ld%U1%V1 %0,%1 + st%U0%V0 %1,%0" + "reload_completed && GET_CODE (operands[1]) == CONST_VECTOR" + [(set (match_dup 0) (match_dup 2))] + { + HOST_WIDE_INT intval = INTVAL (XVECEXP (operands[1], 0, 1)) << 16; + intval |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF; + + operands[0] = gen_rtx_REG (SImode, REGNO (operands[0])); + operands[2] = GEN_INT (trunc_int_for_mode (intval, SImode)); + } + [(set_attr "type" "move,move,load,store") + (set_attr "predicable" "yes,yes,no,no") + (set_attr "iscompact" "false,false,false,false") + ]) + +(define_expand "movmisalignv2hi" + [(set (match_operand:V2HI 0 "general_operand" "") + (match_operand:V2HI 1 "general_operand" ""))] + "" +{ + if (!register_operand (operands[0], V2HImode) + && !register_operand (operands[1], V2HImode)) + operands[1] = force_reg (V2HImode, operands[1]); +}) + +(define_expand "mov<mode>" + [(set (match_operand:VWH 0 "move_dest_operand" "") + (match_operand:VWH 1 "general_operand" ""))] + "" + "{ + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (<MODE>mode, operands[1]); + }") + +(define_insn_and_split "*mov<mode>_insn" + [(set (match_operand:VWH 0 "move_dest_operand" "=r,r,r,m") + (match_operand:VWH 1 "general_operand" "i,r,m,r"))] + "TARGET_PLUS_QMACW + && (register_operand (operands[0], <MODE>mode) + || register_operand (operands[1], <MODE>mode))" + "* +{ + switch (which_alternative) + { + default: + return \"#\"; + + case 1: + return \"vadd2 %0, %1, 0\"; + + case 2: + if (TARGET_LL64) + return \"ldd%U1%V1 %0,%1\"; + return \"#\"; + + case 3: + if (TARGET_LL64) + return \"std%U0%V0 %1,%0\"; + return \"#\"; + } +}" + "reload_completed" + [(const_int 0)] + { + arc_split_move (operands); + DONE; + } + [(set_attr "type" "move,move,load,store") + (set_attr "predicable" "yes,no,no,no") + (set_attr "iscompact" "false,false,false,false") + ]) + +(define_expand "movmisalign<mode>" + [(set (match_operand:VWH 0 "general_operand" "") + (match_operand:VWH 1 "general_operand" ""))] + "" +{ + if (!register_operand (operands[0], <MODE>mode) + && !register_operand (operands[1], <MODE>mode)) + operands[1] = force_reg (<MODE>mode, operands[1]); +}) + +(define_insn "bswapv2hi2" + [(set (match_operand:V2HI 0 "register_operand" "=r,r") + (bswap:V2HI (match_operand:V2HI 1 "nonmemory_operand" "r,i")))] + "TARGET_V2 && TARGET_SWAP" + "swape %0, %1" + [(set_attr "length" "4,8") + (set_attr "type" "two_cycle_core")]) + +;; Simple arithmetic insns +(define_insn "add<mode>3" + [(set (match_operand:VCT 0 "register_operand" "=r,r") + (plus:VCT (match_operand:VCT 1 "register_operand" "0,r") + (match_operand:VCT 2 "register_operand" "r,r")))] + "TARGET_PLUS_DMPY" + "vadd<V_suffix>%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +(define_insn "sub<mode>3" + [(set (match_operand:VCT 0 "register_operand" "=r,r") + (minus:VCT (match_operand:VCT 1 "register_operand" "0,r") + (match_operand:VCT 2 "register_operand" "r,r")))] + "TARGET_PLUS_DMPY" + "vsub<V_suffix>%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +;; Combined arithmetic ops +(define_insn "addsub<mode>3" + [(set (match_operand:VDV 0 "register_operand" "=r,r") + (vec_concat:VDV + (plus:<V_addsub> (vec_select:<V_addsub> (match_operand:VDV 1 "register_operand" "0,r") + (parallel [(const_int 0)])) + (vec_select:<V_addsub> (match_operand:VDV 2 "register_operand" "r,r") + (parallel [(const_int 0)]))) + (minus:<V_addsub> (vec_select:<V_addsub> (match_dup 1) (parallel [(const_int 1)])) + (vec_select:<V_addsub> (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_PLUS_DMPY" + "vaddsub<V_addsub_suffix>%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +(define_insn "subadd<mode>3" + [(set (match_operand:VDV 0 "register_operand" "=r,r") + (vec_concat:VDV + (minus:<V_addsub> (vec_select:<V_addsub> (match_operand:VDV 1 "register_operand" "0,r") + (parallel [(const_int 0)])) + (vec_select:<V_addsub> (match_operand:VDV 2 "register_operand" "r,r") + (parallel [(const_int 0)]))) + (plus:<V_addsub> (vec_select:<V_addsub> (match_dup 1) (parallel [(const_int 1)])) + (vec_select:<V_addsub> (match_dup 2) (parallel [(const_int 1)])))))] + "TARGET_PLUS_DMPY" + "vsubadd<V_addsub_suffix>%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +(define_insn "addsubv4hi3" + [(set (match_operand:V4HI 0 "even_register_operand" "=r,r") + (vec_concat:V4HI + (vec_concat:V2HI + (plus:HI (vec_select:HI (match_operand:V4HI 1 "even_register_operand" "0,r") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V4HI 2 "even_register_operand" "r,r") + (parallel [(const_int 0)]))) + (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))) + (vec_concat:V2HI + (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))) + (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 3)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + ))] + "TARGET_PLUS_QMACW" + "vaddsub4h%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +(define_insn "subaddv4hi3" + [(set (match_operand:V4HI 0 "even_register_operand" "=r,r") + (vec_concat:V4HI + (vec_concat:V2HI + (minus:HI (vec_select:HI (match_operand:V4HI 1 "even_register_operand" "0,r") + (parallel [(const_int 0)])) + (vec_select:HI (match_operand:V4HI 2 "even_register_operand" "r,r") + (parallel [(const_int 0)]))) + (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))) + (vec_concat:V2HI + (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))) + (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 3)])) + (vec_select:HI (match_dup 2) (parallel [(const_int 3)])))) + ))] + "TARGET_PLUS_QMACW" + "vsubadd4h%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +;; Multiplication +(define_insn "dmpyh<V_US_suffix>" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI + (mult:SI + (SE:SI + (vec_select:HI (match_operand:V2HI 1 "register_operand" "0,r") + (parallel [(const_int 0)]))) + (SE:SI + (vec_select:HI (match_operand:V2HI 2 "register_operand" "r,r") + (parallel [(const_int 0)])))) + (mult:SI + (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))) + (set (reg:DI ARCV2_ACC) + (zero_extend:DI + (plus:SI + (mult:SI + (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 0)]))) + (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 0)])))) + (mult:SI + (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))) + (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))))] + "TARGET_PLUS_DMPY" + "dmpy<V_US_suffix>%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +;; We can use dmac as well here. To be investigated which version +;; brings more. +(define_expand "sdot_prodv2hi" + [(match_operand:SI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "") + (match_operand:SI 3 "register_operand" "")] + "TARGET_PLUS_DMPY" +{ + rtx t = gen_reg_rtx (SImode); + emit_insn (gen_dmpyh (t, operands[1], operands[2])); + emit_insn (gen_addsi3 (operands[0], operands[3], t)); + DONE; +}) + +(define_expand "udot_prodv2hi" + [(match_operand:SI 0 "register_operand" "") + (match_operand:V2HI 1 "register_operand" "") + (match_operand:V2HI 2 "register_operand" "") + (match_operand:SI 3 "register_operand" "")] + "TARGET_PLUS_DMPY" +{ + rtx t = gen_reg_rtx (SImode); + emit_insn (gen_dmpyhu (t, operands[1], operands[2])); + emit_insn (gen_addsi3 (operands[0], operands[3], t)); + DONE; +}) + +(define_insn "arc_vec_<V_US>mult_lo_v4hi" + [(set (match_operand:V2SI 0 "even_register_operand" "=r,r") + (mult:V2SI (SE:V2SI (vec_select:V2HI + (match_operand:V4HI 1 "even_register_operand" "0,r") + (parallel [(const_int 0) (const_int 1)]))) + (SE:V2SI (vec_select:V2HI + (match_operand:V4HI 2 "even_register_operand" "r,r") + (parallel [(const_int 0) (const_int 1)]))))) + (set (reg:V2SI ARCV2_ACC) + (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1) + (parallel [(const_int 0) (const_int 1)]))) + (SE:V2SI (vec_select:V2HI (match_dup 2) + (parallel [(const_int 0) (const_int 1)]))))) + ] + "TARGET_PLUS_MACD" + "vmpy2h<V_US_suffix>%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +(define_insn "arc_vec_<V_US>multacc_lo_v4hi" + [(set (reg:V2SI ARCV2_ACC) + (mult:V2SI (SE:V2SI (vec_select:V2HI + (match_operand:V4HI 0 "even_register_operand" "r") + (parallel [(const_int 0) (const_int 1)]))) + (SE:V2SI (vec_select:V2HI + (match_operand:V4HI 1 "even_register_operand" "r") + (parallel [(const_int 0) (const_int 1)]))))) + ] + "TARGET_PLUS_MACD" + "vmpy2h<V_US_suffix>%? 0, %0, %1" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "no") + (set_attr "cond" "nocond")]) + +(define_expand "vec_widen_<V_US>mult_lo_v4hi" + [(set (match_operand:V2SI 0 "even_register_operand" "") + (mult:V2SI (SE:V2SI (vec_select:V2HI + (match_operand:V4HI 1 "even_register_operand" "") + (parallel [(const_int 0) (const_int 1)]))) + (SE:V2SI (vec_select:V2HI + (match_operand:V4HI 2 "even_register_operand" "") + (parallel [(const_int 0) (const_int 1)])))))] + "TARGET_PLUS_QMACW" + { + emit_insn (gen_arc_vec_<V_US>mult_lo_v4hi (operands[0], + operands[1], + operands[2])); + DONE; + } +) + +(define_insn "arc_vec_<V_US>mult_hi_v4hi" + [(set (match_operand:V2SI 0 "even_register_operand" "=r,r") + (mult:V2SI (SE:V2SI (vec_select:V2HI + (match_operand:V4HI 1 "even_register_operand" "0,r") + (parallel [(const_int 2) (const_int 3)]))) + (SE:V2SI (vec_select:V2HI + (match_operand:V4HI 2 "even_register_operand" "r,r") + (parallel [(const_int 2) (const_int 3)]))))) + (set (reg:V2SI ARCV2_ACC) + (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1) + (parallel [(const_int 2) (const_int 3)]))) + (SE:V2SI (vec_select:V2HI (match_dup 2) + (parallel [(const_int 2) (const_int 3)]))))) + ] + "TARGET_PLUS_QMACW" + "vmpy2h<V_US_suffix>%? %0, %R1, %R2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +(define_expand "vec_widen_<V_US>mult_hi_v4hi" + [(set (match_operand:V2SI 0 "even_register_operand" "") + (mult:V2SI (SE:V2SI (vec_select:V2HI + (match_operand:V4HI 1 "even_register_operand" "") + (parallel [(const_int 2) (const_int 3)]))) + (SE:V2SI (vec_select:V2HI + (match_operand:V4HI 2 "even_register_operand" "") + (parallel [(const_int 2) (const_int 3)])))))] + "TARGET_PLUS_MACD" + { + emit_insn (gen_arc_vec_<V_US>mult_hi_v4hi (operands[0], + operands[1], + operands[2])); + DONE; + } +) + +(define_insn "arc_vec_<V_US>mac_hi_v4hi" + [(set (match_operand:V2SI 0 "even_register_operand" "=r,r") + (plus:V2SI + (reg:V2SI ARCV2_ACC) + (mult:V2SI (SE:V2SI (vec_select:V2HI + (match_operand:V4HI 1 "even_register_operand" "0,r") + (parallel [(const_int 2) (const_int 3)]))) + (SE:V2SI (vec_select:V2HI + (match_operand:V4HI 2 "even_register_operand" "r,r") + (parallel [(const_int 2) (const_int 3)])))))) + (set (reg:V2SI ARCV2_ACC) + (plus:V2SI + (reg:V2SI ARCV2_ACC) + (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1) + (parallel [(const_int 2) (const_int 3)]))) + (SE:V2SI (vec_select:V2HI (match_dup 2) + (parallel [(const_int 2) (const_int 3)])))))) + ] + "TARGET_PLUS_MACD" + "vmac2h<V_US_suffix>%? %0, %R1, %R2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +;; Builtins +(define_insn "dmach" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI [(match_operand:V2HI 1 "register_operand" "0,r") + (match_operand:V2HI 2 "register_operand" "r,r") + (reg:DI ARCV2_ACC)] + UNSPEC_ARC_DMACH)) + (clobber (reg:DI ARCV2_ACC))] + "TARGET_PLUS_DMPY" + "dmach%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +(define_insn "dmachu" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (unspec:SI [(match_operand:V2HI 1 "register_operand" "0,r") + (match_operand:V2HI 2 "register_operand" "r,r") + (reg:DI ARCV2_ACC)] + UNSPEC_ARC_DMACHU)) + (clobber (reg:DI ARCV2_ACC))] + "TARGET_PLUS_DMPY" + "dmachu%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +(define_insn "dmacwh" + [(set (match_operand:DI 0 "even_register_operand" "=r,r") + (unspec:DI [(match_operand:V2SI 1 "even_register_operand" "0,r") + (match_operand:V2HI 2 "register_operand" "r,r") + (reg:DI ARCV2_ACC)] + UNSPEC_ARC_DMACWH)) + (clobber (reg:DI ARCV2_ACC))] + "TARGET_PLUS_QMACW" + "dmacwh%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +(define_insn "dmacwhu" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (unspec:DI [(match_operand:V2SI 1 "even_register_operand" "0,r") + (match_operand:V2HI 2 "register_operand" "r,r") + (reg:DI ARCV2_ACC)] + UNSPEC_ARC_DMACWHU)) + (clobber (reg:DI ARCV2_ACC))] + "TARGET_PLUS_QMACW" + "dmacwhu%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +(define_insn "vmac2h" + [(set (match_operand:V2SI 0 "even_register_operand" "=r,r") + (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r") + (match_operand:V2HI 2 "register_operand" "r,r") + (reg:DI ARCV2_ACC)] + UNSPEC_ARC_VMAC2H)) + (clobber (reg:DI ARCV2_ACC))] + "TARGET_PLUS_MACD" + "vmac2h%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +(define_insn "vmac2hu" + [(set (match_operand:V2SI 0 "even_register_operand" "=r,r") + (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r") + (match_operand:V2HI 2 "register_operand" "r,r") + (reg:DI ARCV2_ACC)] + UNSPEC_ARC_VMAC2HU)) + (clobber (reg:DI ARCV2_ACC))] + "TARGET_PLUS_MACD" + "vmac2hu%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +(define_insn "vmpy2h" + [(set (match_operand:V2SI 0 "even_register_operand" "=r,r") + (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r") + (match_operand:V2HI 2 "register_operand" "r,r")] + UNSPEC_ARC_VMPY2H)) + (clobber (reg:DI ARCV2_ACC))] + "TARGET_PLUS_MACD" + "vmpy2h%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +(define_insn "vmpy2hu" + [(set (match_operand:V2SI 0 "even_register_operand" "=r,r") + (unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r") + (match_operand:V2HI 2 "register_operand" "r,r")] + UNSPEC_ARC_VMPY2HU)) + (clobber (reg:DI ARCV2_ACC))] + "TARGET_PLUS_MACD" + "vmpy2hu%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +(define_insn "qmach" + [(set (match_operand:DI 0 "even_register_operand" "=r,r") + (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r") + (match_operand:V4HI 2 "even_register_operand" "r,r") + (reg:DI ARCV2_ACC)] + UNSPEC_ARC_QMACH)) + (clobber (reg:DI ARCV2_ACC))] + "TARGET_PLUS_QMACW" + "qmach%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +(define_insn "qmachu" + [(set (match_operand:DI 0 "even_register_operand" "=r,r") + (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r") + (match_operand:V4HI 2 "even_register_operand" "r,r") + (reg:DI ARCV2_ACC)] + UNSPEC_ARC_QMACHU)) + (clobber (reg:DI ARCV2_ACC))] + "TARGET_PLUS_QMACW" + "qmachu%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +(define_insn "qmpyh" + [(set (match_operand:DI 0 "even_register_operand" "=r,r") + (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r") + (match_operand:V4HI 2 "even_register_operand" "r,r")] + UNSPEC_ARC_QMPYH)) + (clobber (reg:DI ARCV2_ACC))] + "TARGET_PLUS_QMACW" + "qmpyh%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) + +(define_insn "qmpyhu" + [(set (match_operand:DI 0 "even_register_operand" "=r,r") + (unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r") + (match_operand:V4HI 2 "even_register_operand" "r,r")] + UNSPEC_ARC_QMPYHU)) + (clobber (reg:DI ARCV2_ACC))] + "TARGET_PLUS_QMACW" + "qmpyhu%? %0, %1, %2" + [(set_attr "length" "4") + (set_attr "type" "multi") + (set_attr "predicable" "yes,no") + (set_attr "cond" "canuse,nocond")]) diff --git a/gcc/testsuite/gcc.target/arc/builtin_simdarc.c b/gcc/testsuite/gcc.target/arc/builtin_simdarc.c new file mode 100644 index 0000000000000000000000000000000000000000..68aae40ca58e99f3d2faed72ad6154c89e5d363c --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/builtin_simdarc.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-mcpu=archs -O2 -Werror-implicit-function-declaration -mmpy-option=9" } */ + +#define STEST(name, rettype, op1type, op2type) \ + rettype test_ ## name \ + (op1type a, op2type b) \ + { \ + return __builtin_arc_ ## name (a, b); \ + } + +typedef short v2hi __attribute__ ((vector_size (4))); +typedef short v4hi __attribute__ ((vector_size (8))); +typedef int v2si __attribute__ ((vector_size (8))); + +STEST (qmach, long long, v4hi, v4hi) +STEST (qmachu, long long, v4hi, v4hi) +STEST (qmpyh, long long, v4hi, v4hi) +STEST (qmpyhu, long long, v4hi, v4hi) + +STEST (dmach, int, v2hi, v2hi) +STEST (dmachu, int, v2hi, v2hi) +STEST (dmpyh, int, v2hi, v2hi) +STEST (dmpyhu, int, v2hi, v2hi) + +STEST (dmacwh, long, v2si, v2hi) +STEST (dmacwhu, long, v2si, v2hi) + +STEST (vmac2h, v2si, v2hi, v2hi) +STEST (vmac2hu, v2si, v2hi, v2hi) +STEST (vmpy2h, v2si, v2hi, v2hi) +STEST (vmpy2hu, v2si, v2hi, v2hi) + +STEST (vaddsub2h, v2hi, v2hi, v2hi) +STEST (vsubadd2h, v2hi, v2hi, v2hi) +STEST (vaddsub, v2si, v2si, v2si) +STEST (vsubadd, v2si, v2si, v2si) +STEST (vaddsub4h, v4hi, v4hi, v4hi) +STEST (vsubadd4h, v4hi, v4hi, v4hi)