From 00c072ae51c132e346eab0c6f8c176542efbcd5a Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@synopsys.com>
Date: Thu, 28 Apr 2016 11:53:13 +0200
Subject: [PATCH] [ARC] Add SIMD extensions for ARC HS

gcc/
2016-04-28  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc.c (arc_vector_mode_supported_p): Add support for
	the new ARC HS SIMD instructions.
	(arc_preferred_simd_mode): New function.
	(arc_autovectorize_vector_sizes): Likewise.
	(TARGET_VECTORIZE_PREFERRED_SIMD_MODE)
	(TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.
	(arc_init_reg_tables): Accept new ARC HS SIMD modes.
	(arc_init_builtins): Add new SIMD builtin types.
	(arc_split_move): Handle 64 bit vector moves.
	* config/arc/arc.h (TARGET_PLUS_DMPY, TARGET_PLUS_MACD)
	(TARGET_PLUS_QMACW): Define.
	* config/arc/builtins.def (QMACH, QMACHU, QMPYH, QMPYHU, DMACH)
	(DMACHU, DMPYH, DMPYHU, DMACWH, DMACWHU, VMAC2H, VMAC2HU, VMPY2H)
	(VMPY2HU, VADDSUB2H, VSUBADD2H, VADDSUB, VSUBADD, VADDSUB4H)
	(VSUBADD4H): New builtins.
	* config/arc/simdext.md: Add new ARC HS SIMD instructions.
	* testsuite/gcc.target/arc/builtin_simdarc.c: New file.

From-SVN: r235551
---
 gcc/ChangeLog                                 |  20 +
 gcc/config/arc/arc.c                          | 112 +++-
 gcc/config/arc/arc.h                          |   6 +
 gcc/config/arc/builtins.def                   |  27 +
 gcc/config/arc/simdext.md                     | 571 ++++++++++++++++++
 .../gcc.target/arc/builtin_simdarc.c          |  38 ++
 6 files changed, 767 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arc/builtin_simdarc.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a41b8e0841cd..f557c9ff4537 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2016-04-28  Claudiu Zissulescu  <claziss@synopsys.com>
+
+	* config/arc/arc.c (arc_vector_mode_supported_p): Add support for
+	the new ARC HS SIMD instructions.
+	(arc_preferred_simd_mode): New function.
+	(arc_autovectorize_vector_sizes): Likewise.
+	(TARGET_VECTORIZE_PREFERRED_SIMD_MODE)
+	(TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES): Define.
+	(arc_init_reg_tables): Accept new ARC HS SIMD modes.
+	(arc_init_builtins): Add new SIMD builtin types.
+	(arc_split_move): Handle 64 bit vector moves.
+	* config/arc/arc.h (TARGET_PLUS_DMPY, TARGET_PLUS_MACD)
+	(TARGET_PLUS_QMACW): Define.
+	* config/arc/builtins.def (QMACH, QMACHU, QMPYH, QMPYHU, DMACH)
+	(DMACHU, DMPYH, DMPYHU, DMACWH, DMACWHU, VMAC2H, VMAC2HU, VMPY2H)
+	(VMPY2HU, VADDSUB2H, VSUBADD2H, VADDSUB, VSUBADD, VADDSUB4H)
+	(VSUBADD4H): New builtins.
+	* config/arc/simdext.md: Add new ARC HS SIMD instructions.
+	* testsuite/gcc.target/arc/builtin_simdarc.c: New file.
+
 2016-04-28  Eduard Sanou  <dhole@openmailbox.org>
 	    Matthias Klose  <doko@debian.org>
 
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index d60db502ef85..d120946a5f2f 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -247,16 +247,47 @@ static bool arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
 static bool
 arc_vector_mode_supported_p (machine_mode mode)
 {
-  if (!TARGET_SIMD_SET)
-    return false;
+  switch (mode)
+    {
+    case V2HImode:
+      return TARGET_PLUS_DMPY;
+    case V4HImode:
+    case V2SImode:
+      return TARGET_PLUS_QMACW;
+    case V4SImode:
+    case V8HImode:
+      return TARGET_SIMD_SET;
 
-  if ((mode == V4SImode)
-      || (mode == V8HImode))
-    return true;
+    default:
+      return false;
+    }
+}
 
-  return false;
+/* Implements target hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE.  */
+
+static enum machine_mode
+arc_preferred_simd_mode (enum machine_mode mode)
+{
+  switch (mode)
+    {
+    case HImode:
+      return TARGET_PLUS_QMACW ? V4HImode : V2HImode;
+    case SImode:
+      return V2SImode;
+
+    default:
+      return word_mode;
+    }
 }
 
+/* Implements target hook
+   TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES.  */
+
+static unsigned int
+arc_autovectorize_vector_sizes (void)
+{
+  return TARGET_PLUS_QMACW ? (8 | 4) : 0;
+}
 
 /* TARGET_PRESERVE_RELOAD_P is still awaiting patch re-evaluation / review.  */
 static bool arc_preserve_reload_p (rtx in) ATTRIBUTE_UNUSED;
@@ -345,6 +376,12 @@ static void arc_finalize_pic (void);
 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 #define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
 
+#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arc_preferred_simd_mode
+
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES arc_autovectorize_vector_sizes
+
 #undef TARGET_CAN_USE_DOLOOP_P
 #define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p
 
@@ -1214,7 +1251,12 @@ arc_init_reg_tables (void)
 	    arc_mode_class[i] = 0;
 	  break;
 	case MODE_VECTOR_INT:
-	  arc_mode_class [i] = (1<< (int) V_MODE);
+	  if (GET_MODE_SIZE (m) == 4)
+	    arc_mode_class[i] = (1 << (int) S_MODE);
+	  else if (GET_MODE_SIZE (m) == 8)
+	    arc_mode_class[i] = (1 << (int) D_MODE);
+	  else
+	    arc_mode_class[i] = (1 << (int) V_MODE);
 	  break;
 	case MODE_CC:
 	default:
@@ -5277,6 +5319,15 @@ arc_builtin_decl (unsigned id, bool initialize_p ATTRIBUTE_UNUSED)
 static void
 arc_init_builtins (void)
 {
+  tree V4HI_type_node;
+  tree V2SI_type_node;
+  tree V2HI_type_node;
+
+  /* Vector types based on HS SIMD elements.  */
+  V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
+  V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
+  V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode);
+
   tree pcvoid_type_node
     = build_pointer_type (build_qualified_type (void_type_node,
 						TYPE_QUAL_CONST));
@@ -5341,6 +5392,28 @@ arc_init_builtins (void)
   tree v8hi_ftype_v8hi
     = build_function_type_list (V8HI_type_node, V8HI_type_node,
 				NULL_TREE);
+  /* ARCv2 SIMD types.  */
+  tree long_ftype_v4hi_v4hi
+    = build_function_type_list (long_long_integer_type_node,
+				V4HI_type_node,	V4HI_type_node, NULL_TREE);
+  tree int_ftype_v2hi_v2hi
+    = build_function_type_list (integer_type_node,
+				V2HI_type_node, V2HI_type_node, NULL_TREE);
+  tree v2si_ftype_v2hi_v2hi
+    = build_function_type_list (V2SI_type_node,
+				V2HI_type_node, V2HI_type_node, NULL_TREE);
+  tree v2hi_ftype_v2hi_v2hi
+    = build_function_type_list (V2HI_type_node,
+				V2HI_type_node, V2HI_type_node, NULL_TREE);
+  tree v2si_ftype_v2si_v2si
+    = build_function_type_list (V2SI_type_node,
+				V2SI_type_node, V2SI_type_node, NULL_TREE);
+  tree v4hi_ftype_v4hi_v4hi
+    = build_function_type_list (V4HI_type_node,
+				V4HI_type_node, V4HI_type_node, NULL_TREE);
+  tree long_ftype_v2si_v2hi
+    = build_function_type_list (long_long_integer_type_node,
+				V2SI_type_node, V2HI_type_node, NULL_TREE);
 
   /* Add the builtins.  */
 #define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, MASK)			\
@@ -8706,6 +8779,31 @@ arc_split_move (rtx *operands)
       return;
     }
 
+  if (TARGET_PLUS_QMACW
+      && GET_CODE (operands[1]) == CONST_VECTOR)
+    {
+      HOST_WIDE_INT intval0, intval1;
+      if (GET_MODE (operands[1]) == V2SImode)
+	{
+	  intval0 = INTVAL (XVECEXP (operands[1], 0, 0));
+	  intval1 = INTVAL (XVECEXP (operands[1], 0, 1));
+	}
+      else
+	{
+	  intval1  = INTVAL (XVECEXP (operands[1], 0, 3)) << 16;
+	  intval1 |= INTVAL (XVECEXP (operands[1], 0, 2)) & 0xFFFF;
+	  intval0  = INTVAL (XVECEXP (operands[1], 0, 1)) << 16;
+	  intval0 |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF;
+	}
+      xop[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+      xop[3] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
+      xop[2] = GEN_INT (trunc_int_for_mode (intval0, SImode));
+      xop[1] = GEN_INT (trunc_int_for_mode (intval1, SImode));
+      emit_move_insn (xop[0], xop[2]);
+      emit_move_insn (xop[3], xop[1]);
+      return;
+    }
+
   for (i = 0; i < 2; i++)
     {
       if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
index 1c2a38d4acfc..5100a5b8f821 100644
--- a/gcc/config/arc/arc.h
+++ b/gcc/config/arc/arc.h
@@ -1724,6 +1724,12 @@ enum
 /* Any multiplication feature macro.  */
 #define TARGET_ANY_MPY						\
   (TARGET_MPY || TARGET_MUL64_SET || TARGET_MULMAC_32BY16_SET)
+/* PLUS_DMPY feature macro.  */
+#define TARGET_PLUS_DMPY  ((arc_mpy_option > 6) && TARGET_HS)
+/* PLUS_MACD feature macro.  */
+#define TARGET_PLUS_MACD  ((arc_mpy_option > 7) && TARGET_HS)
+/* PLUS_QMACW feature macro.  */
+#define TARGET_PLUS_QMACW ((arc_mpy_option > 8) && TARGET_HS)
 
 /* ARC600 and ARC601 feature macro.  */
 #define TARGET_ARC600_FAMILY (TARGET_ARC600 || TARGET_ARC601)
diff --git a/gcc/config/arc/builtins.def b/gcc/config/arc/builtins.def
index 19be1d218520..8c71d30a459a 100644
--- a/gcc/config/arc/builtins.def
+++ b/gcc/config/arc/builtins.def
@@ -193,3 +193,30 @@ DEF_BUILTIN (VINTI, 1, void_ftype_int, vinti_insn, TARGET_SIMD_SET)
 
 /* END SIMD marker.  */
 DEF_BUILTIN (SIMD_END, 0, void_ftype_void, nothing, 0)
+
+/* ARCv2 SIMD instructions that use/clobber the accumulator reg.  */
+DEF_BUILTIN (QMACH,      2, long_ftype_v4hi_v4hi,   qmach,      TARGET_PLUS_QMACW)
+DEF_BUILTIN (QMACHU,     2, long_ftype_v4hi_v4hi,   qmachu,     TARGET_PLUS_QMACW)
+DEF_BUILTIN (QMPYH,      2, long_ftype_v4hi_v4hi,   qmpyh,      TARGET_PLUS_QMACW)
+DEF_BUILTIN (QMPYHU,     2, long_ftype_v4hi_v4hi,   qmpyhu,     TARGET_PLUS_QMACW)
+
+DEF_BUILTIN (DMACH,      2, int_ftype_v2hi_v2hi,    dmach,      TARGET_PLUS_DMPY)
+DEF_BUILTIN (DMACHU,     2, int_ftype_v2hi_v2hi,    dmachu,     TARGET_PLUS_DMPY)
+DEF_BUILTIN (DMPYH,      2, int_ftype_v2hi_v2hi,    dmpyh,      TARGET_PLUS_DMPY)
+DEF_BUILTIN (DMPYHU,     2, int_ftype_v2hi_v2hi,    dmpyhu,     TARGET_PLUS_DMPY)
+
+DEF_BUILTIN (DMACWH,     2, long_ftype_v2si_v2hi,   dmacwh,     TARGET_PLUS_QMACW)
+DEF_BUILTIN (DMACWHU,    2, long_ftype_v2si_v2hi,   dmacwhu,    TARGET_PLUS_QMACW)
+
+DEF_BUILTIN (VMAC2H,     2, v2si_ftype_v2hi_v2hi,   vmac2h,     TARGET_PLUS_MACD)
+DEF_BUILTIN (VMAC2HU,    2, v2si_ftype_v2hi_v2hi,   vmac2hu,    TARGET_PLUS_MACD)
+DEF_BUILTIN (VMPY2H,     2, v2si_ftype_v2hi_v2hi,   vmpy2h,     TARGET_PLUS_MACD)
+DEF_BUILTIN (VMPY2HU,    2, v2si_ftype_v2hi_v2hi,   vmpy2hu,    TARGET_PLUS_MACD)
+
+/* Combined add/sub HS SIMD instructions.  */
+DEF_BUILTIN (VADDSUB2H,  2, v2hi_ftype_v2hi_v2hi,   addsubv2hi3, TARGET_PLUS_DMPY)
+DEF_BUILTIN (VSUBADD2H,  2, v2hi_ftype_v2hi_v2hi,   subaddv2hi3, TARGET_PLUS_DMPY)
+DEF_BUILTIN (VADDSUB,    2, v2si_ftype_v2si_v2si,   addsubv2si3, TARGET_PLUS_QMACW)
+DEF_BUILTIN (VSUBADD,    2, v2si_ftype_v2si_v2si,   subaddv2si3, TARGET_PLUS_QMACW)
+DEF_BUILTIN (VADDSUB4H,  2, v4hi_ftype_v4hi_v4hi,   addsubv4hi3, TARGET_PLUS_QMACW)
+DEF_BUILTIN (VSUBADD4H,  2, v4hi_ftype_v4hi_v4hi,   subaddv4hi3, TARGET_PLUS_QMACW)
diff --git a/gcc/config/arc/simdext.md b/gcc/config/arc/simdext.md
index 9fd9d62e0483..51869e367726 100644
--- a/gcc/config/arc/simdext.md
+++ b/gcc/config/arc/simdext.md
@@ -1288,3 +1288,574 @@
   [(set_attr "type" "simd_vcontrol")
    (set_attr "length" "4")
    (set_attr "cond" "nocond")])
+
+;; New ARCv2 SIMD extensions
+
+;;64-bit vectors of halwords and words
+(define_mode_iterator VWH [V4HI V2SI])
+
+;;double element vectors
+(define_mode_iterator VDV [V2HI V2SI])
+(define_mode_attr V_addsub [(V2HI "HI") (V2SI "SI")])
+(define_mode_attr V_addsub_suffix [(V2HI "2h") (V2SI "")])
+
+;;all vectors
+(define_mode_iterator VCT [V2HI V4HI V2SI])
+(define_mode_attr V_suffix [(V2HI "2h") (V4HI "4h") (V2SI "2")])
+
+;; Widening operations.
+(define_code_iterator SE [sign_extend zero_extend])
+(define_code_attr V_US [(sign_extend "s") (zero_extend "u")])
+(define_code_attr V_US_suffix [(sign_extend "") (zero_extend "u")])
+
+
+;; Move patterns
+(define_expand "movv2hi"
+  [(set (match_operand:V2HI 0 "move_dest_operand" "")
+	(match_operand:V2HI 1 "general_operand" ""))]
+  ""
+  "{
+    if (prepare_move_operands (operands, V2HImode))
+         DONE;
+   }")
+
+(define_insn_and_split "*movv2hi_insn"
+  [(set (match_operand:V2HI 0 "nonimmediate_operand" "=r,r,r,m")
+	(match_operand:V2HI 1 "general_operand"       "i,r,m,r"))]
+  "(register_operand (operands[0], V2HImode)
+    || register_operand (operands[1], V2HImode))"
+  "@
+   #
+   mov%? %0, %1
+   ld%U1%V1 %0,%1
+   st%U0%V0 %1,%0"
+  "reload_completed && GET_CODE (operands[1]) == CONST_VECTOR"
+  [(set (match_dup 0) (match_dup 2))]
+  {
+   HOST_WIDE_INT intval = INTVAL (XVECEXP (operands[1], 0, 1)) << 16;
+   intval |= INTVAL (XVECEXP (operands[1], 0, 0)) & 0xFFFF;
+
+   operands[0] = gen_rtx_REG (SImode, REGNO (operands[0]));
+   operands[2] = GEN_INT (trunc_int_for_mode (intval, SImode));
+  }
+  [(set_attr "type" "move,move,load,store")
+   (set_attr "predicable" "yes,yes,no,no")
+   (set_attr "iscompact"  "false,false,false,false")
+   ])
+
+(define_expand "movmisalignv2hi"
+ [(set (match_operand:V2HI 0 "general_operand" "")
+       (match_operand:V2HI 1 "general_operand" ""))]
+ ""
+{
+ if (!register_operand (operands[0], V2HImode)
+      && !register_operand (operands[1], V2HImode))
+    operands[1] = force_reg (V2HImode, operands[1]);
+})
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VWH 0 "move_dest_operand" "")
+	(match_operand:VWH 1 "general_operand" ""))]
+  ""
+  "{
+    if (GET_CODE (operands[0]) == MEM)
+     operands[1] = force_reg (<MODE>mode, operands[1]);
+   }")
+
+(define_insn_and_split "*mov<mode>_insn"
+  [(set (match_operand:VWH 0 "move_dest_operand" "=r,r,r,m")
+	(match_operand:VWH 1 "general_operand"    "i,r,m,r"))]
+  "TARGET_PLUS_QMACW
+   && (register_operand (operands[0], <MODE>mode)
+       || register_operand (operands[1], <MODE>mode))"
+  "*
+{
+  switch (which_alternative)
+    {
+     default:
+       return \"#\";
+
+     case 1:
+       return \"vadd2 %0, %1, 0\";
+
+     case 2:
+       if (TARGET_LL64)
+         return \"ldd%U1%V1 %0,%1\";
+       return \"#\";
+
+     case 3:
+       if (TARGET_LL64)
+	   return \"std%U0%V0 %1,%0\";
+	 return \"#\";
+    }
+}"
+  "reload_completed"
+  [(const_int 0)]
+  {
+   arc_split_move (operands);
+   DONE;
+  }
+  [(set_attr "type" "move,move,load,store")
+   (set_attr "predicable" "yes,no,no,no")
+   (set_attr "iscompact"  "false,false,false,false")
+   ])
+
+(define_expand "movmisalign<mode>"
+ [(set (match_operand:VWH 0 "general_operand" "")
+       (match_operand:VWH 1 "general_operand" ""))]
+ ""
+{
+ if (!register_operand (operands[0], <MODE>mode)
+      && !register_operand (operands[1], <MODE>mode))
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+})
+
+(define_insn "bswapv2hi2"
+  [(set (match_operand:V2HI 0 "register_operand" "=r,r")
+        (bswap:V2HI (match_operand:V2HI 1 "nonmemory_operand" "r,i")))]
+  "TARGET_V2 && TARGET_SWAP"
+  "swape %0, %1"
+  [(set_attr "length" "4,8")
+   (set_attr "type" "two_cycle_core")])
+
+;; Simple arithmetic insns
+(define_insn "add<mode>3"
+  [(set (match_operand:VCT 0 "register_operand"          "=r,r")
+	(plus:VCT (match_operand:VCT 1 "register_operand" "0,r")
+		  (match_operand:VCT 2 "register_operand" "r,r")))]
+  "TARGET_PLUS_DMPY"
+  "vadd<V_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:VCT 0 "register_operand"           "=r,r")
+	(minus:VCT (match_operand:VCT 1 "register_operand" "0,r")
+		   (match_operand:VCT 2 "register_operand" "r,r")))]
+  "TARGET_PLUS_DMPY"
+  "vsub<V_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+;; Combined arithmetic ops
+(define_insn "addsub<mode>3"
+  [(set (match_operand:VDV 0 "register_operand" "=r,r")
+	(vec_concat:VDV
+	 (plus:<V_addsub> (vec_select:<V_addsub> (match_operand:VDV 1 "register_operand" "0,r")
+						 (parallel [(const_int 0)]))
+			  (vec_select:<V_addsub> (match_operand:VDV 2 "register_operand" "r,r")
+						 (parallel [(const_int 0)])))
+	 (minus:<V_addsub> (vec_select:<V_addsub> (match_dup 1) (parallel [(const_int 1)]))
+			   (vec_select:<V_addsub> (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_PLUS_DMPY"
+  "vaddsub<V_addsub_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "subadd<mode>3"
+  [(set (match_operand:VDV 0 "register_operand" "=r,r")
+	(vec_concat:VDV
+	 (minus:<V_addsub> (vec_select:<V_addsub> (match_operand:VDV 1 "register_operand" "0,r")
+						  (parallel [(const_int 0)]))
+			   (vec_select:<V_addsub> (match_operand:VDV 2 "register_operand" "r,r")
+						  (parallel [(const_int 0)])))
+	 (plus:<V_addsub> (vec_select:<V_addsub> (match_dup 1) (parallel [(const_int 1)]))
+			  (vec_select:<V_addsub> (match_dup 2) (parallel [(const_int 1)])))))]
+  "TARGET_PLUS_DMPY"
+  "vsubadd<V_addsub_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "addsubv4hi3"
+  [(set (match_operand:V4HI 0 "even_register_operand" "=r,r")
+	(vec_concat:V4HI
+	 (vec_concat:V2HI
+	  (plus:HI (vec_select:HI (match_operand:V4HI 1 "even_register_operand" "0,r")
+				  (parallel [(const_int 0)]))
+		   (vec_select:HI (match_operand:V4HI 2 "even_register_operand" "r,r")
+				  (parallel [(const_int 0)])))
+	  (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
+		    (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))
+	 (vec_concat:V2HI
+	  (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+		   (vec_select:HI (match_dup 2) (parallel [(const_int 2)])))
+	  (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))
+		    (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+	 ))]
+  "TARGET_PLUS_QMACW"
+  "vaddsub4h%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "subaddv4hi3"
+  [(set (match_operand:V4HI 0 "even_register_operand" "=r,r")
+	(vec_concat:V4HI
+	 (vec_concat:V2HI
+	  (minus:HI (vec_select:HI (match_operand:V4HI 1 "even_register_operand" "0,r")
+				   (parallel [(const_int 0)]))
+		    (vec_select:HI (match_operand:V4HI 2 "even_register_operand" "r,r")
+				  (parallel [(const_int 0)])))
+	  (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 1)]))
+		   (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))
+	 (vec_concat:V2HI
+	  (minus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
+		    (vec_select:HI (match_dup 2) (parallel [(const_int 2)])))
+	  (plus:HI (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))
+		   (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
+	 ))]
+  "TARGET_PLUS_QMACW"
+  "vsubadd4h%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+;; Multiplication
+(define_insn "dmpyh<V_US_suffix>"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI
+	 (mult:SI
+	  (SE:SI
+	   (vec_select:HI (match_operand:V2HI 1 "register_operand" "0,r")
+			  (parallel [(const_int 0)])))
+	  (SE:SI
+	   (vec_select:HI (match_operand:V2HI 2 "register_operand" "r,r")
+			  (parallel [(const_int 0)]))))
+	 (mult:SI
+	  (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	  (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 1)]))))))
+   (set (reg:DI ARCV2_ACC)
+	(zero_extend:DI
+	 (plus:SI
+	  (mult:SI
+	   (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 0)])))
+	   (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 0)]))))
+	  (mult:SI
+	   (SE:SI (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
+	   (SE:SI (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))))))]
+  "TARGET_PLUS_DMPY"
+  "dmpy<V_US_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+;; We can use dmac as well here.  To be investigated which version
+;; brings more.
+(define_expand "sdot_prodv2hi"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:SI 3 "register_operand" "")]
+  "TARGET_PLUS_DMPY"
+{
+ rtx t = gen_reg_rtx (SImode);
+ emit_insn (gen_dmpyh (t, operands[1], operands[2]));
+ emit_insn (gen_addsi3 (operands[0], operands[3], t));
+ DONE;
+})
+
+(define_expand "udot_prodv2hi"
+  [(match_operand:SI 0 "register_operand" "")
+   (match_operand:V2HI 1 "register_operand" "")
+   (match_operand:V2HI 2 "register_operand" "")
+   (match_operand:SI 3 "register_operand" "")]
+  "TARGET_PLUS_DMPY"
+{
+ rtx t = gen_reg_rtx (SImode);
+ emit_insn (gen_dmpyhu (t, operands[1], operands[2]));
+ emit_insn (gen_addsi3 (operands[0], operands[3], t));
+ DONE;
+})
+
+(define_insn "arc_vec_<V_US>mult_lo_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand"                     "=r,r")
+       (mult:V2SI (SE:V2SI (vec_select:V2HI
+			    (match_operand:V4HI 1 "even_register_operand" "0,r")
+			    (parallel [(const_int 0) (const_int 1)])))
+		  (SE:V2SI (vec_select:V2HI
+			    (match_operand:V4HI 2 "even_register_operand" "r,r")
+			    (parallel [(const_int 0) (const_int 1)])))))
+  (set (reg:V2SI ARCV2_ACC)
+       (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
+					    (parallel [(const_int 0) (const_int 1)])))
+		  (SE:V2SI (vec_select:V2HI (match_dup 2)
+					    (parallel [(const_int 0) (const_int 1)])))))
+  ]
+  "TARGET_PLUS_MACD"
+  "vmpy2h<V_US_suffix>%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "arc_vec_<V_US>multacc_lo_v4hi"
+  [(set (reg:V2SI ARCV2_ACC)
+	(mult:V2SI (SE:V2SI (vec_select:V2HI
+			     (match_operand:V4HI 0 "even_register_operand" "r")
+			     (parallel [(const_int 0) (const_int 1)])))
+		   (SE:V2SI (vec_select:V2HI
+			     (match_operand:V4HI 1 "even_register_operand" "r")
+			     (parallel [(const_int 0) (const_int 1)])))))
+  ]
+  "TARGET_PLUS_MACD"
+  "vmpy2h<V_US_suffix>%? 0, %0, %1"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "no")
+   (set_attr "cond" "nocond")])
+
+(define_expand "vec_widen_<V_US>mult_lo_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand"                 "")
+       (mult:V2SI (SE:V2SI (vec_select:V2HI
+			    (match_operand:V4HI 1 "even_register_operand" "")
+			    (parallel [(const_int 0) (const_int 1)])))
+		  (SE:V2SI (vec_select:V2HI
+			    (match_operand:V4HI 2 "even_register_operand" "")
+			    (parallel [(const_int 0) (const_int 1)])))))]
+  "TARGET_PLUS_QMACW"
+  {
+     emit_insn (gen_arc_vec_<V_US>mult_lo_v4hi (operands[0],
+						operands[1],
+						operands[2]));
+     DONE;
+  }
+)
+
+(define_insn "arc_vec_<V_US>mult_hi_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand"                     "=r,r")
+       (mult:V2SI (SE:V2SI (vec_select:V2HI
+			    (match_operand:V4HI 1 "even_register_operand" "0,r")
+			    (parallel [(const_int 2) (const_int 3)])))
+		  (SE:V2SI (vec_select:V2HI
+			    (match_operand:V4HI 2 "even_register_operand" "r,r")
+			    (parallel [(const_int 2) (const_int 3)])))))
+  (set (reg:V2SI ARCV2_ACC)
+       (mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
+					    (parallel [(const_int 2) (const_int 3)])))
+		  (SE:V2SI (vec_select:V2HI (match_dup 2)
+					    (parallel [(const_int 2) (const_int 3)])))))
+  ]
+  "TARGET_PLUS_QMACW"
+  "vmpy2h<V_US_suffix>%? %0, %R1, %R2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_expand "vec_widen_<V_US>mult_hi_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand"                               "")
+       (mult:V2SI (SE:V2SI (vec_select:V2HI
+				     (match_operand:V4HI 1 "even_register_operand" "")
+				     (parallel [(const_int 2) (const_int 3)])))
+		  (SE:V2SI (vec_select:V2HI
+				     (match_operand:V4HI 2 "even_register_operand" "")
+				     (parallel [(const_int 2) (const_int 3)])))))]
+  "TARGET_PLUS_MACD"
+  {
+     emit_insn (gen_arc_vec_<V_US>mult_hi_v4hi (operands[0],
+						operands[1],
+						operands[2]));
+     DONE;
+  }
+)
+
+(define_insn "arc_vec_<V_US>mac_hi_v4hi"
+ [(set (match_operand:V2SI 0 "even_register_operand"                     "=r,r")
+       (plus:V2SI
+	(reg:V2SI ARCV2_ACC)
+	(mult:V2SI (SE:V2SI (vec_select:V2HI
+			     (match_operand:V4HI 1 "even_register_operand" "0,r")
+			     (parallel [(const_int 2) (const_int 3)])))
+		   (SE:V2SI (vec_select:V2HI
+			     (match_operand:V4HI 2 "even_register_operand" "r,r")
+			     (parallel [(const_int 2) (const_int 3)]))))))
+  (set (reg:V2SI ARCV2_ACC)
+       (plus:V2SI
+	(reg:V2SI ARCV2_ACC)
+	(mult:V2SI (SE:V2SI (vec_select:V2HI (match_dup 1)
+					     (parallel [(const_int 2) (const_int 3)])))
+		   (SE:V2SI (vec_select:V2HI (match_dup 2)
+					     (parallel [(const_int 2) (const_int 3)]))))))
+  ]
+  "TARGET_PLUS_MACD"
+  "vmac2h<V_US_suffix>%? %0, %R1, %R2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+;; Builtins
+(define_insn "dmach"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:V2HI 1 "register_operand" "0,r")
+		    (match_operand:V2HI 2 "register_operand" "r,r")
+		    (reg:DI ARCV2_ACC)]
+		   UNSPEC_ARC_DMACH))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_DMPY"
+  "dmach%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "dmachu"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI [(match_operand:V2HI 1 "register_operand" "0,r")
+		    (match_operand:V2HI 2 "register_operand" "r,r")
+		    (reg:DI ARCV2_ACC)]
+		   UNSPEC_ARC_DMACHU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_DMPY"
+  "dmachu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "dmacwh"
+  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+	(unspec:DI [(match_operand:V2SI 1 "even_register_operand" "0,r")
+		    (match_operand:V2HI 2 "register_operand"      "r,r")
+		    (reg:DI ARCV2_ACC)]
+		   UNSPEC_ARC_DMACWH))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "dmacwh%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "dmacwhu"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(unspec:DI [(match_operand:V2SI 1 "even_register_operand" "0,r")
+		    (match_operand:V2HI 2 "register_operand"      "r,r")
+		    (reg:DI ARCV2_ACC)]
+		   UNSPEC_ARC_DMACWHU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "dmacwhu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "vmac2h"
+  [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+	(unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
+		      (match_operand:V2HI 2 "register_operand" "r,r")
+		      (reg:DI ARCV2_ACC)]
+		     UNSPEC_ARC_VMAC2H))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_MACD"
+  "vmac2h%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "vmac2hu"
+  [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+	(unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
+		      (match_operand:V2HI 2 "register_operand" "r,r")
+		      (reg:DI ARCV2_ACC)]
+		   UNSPEC_ARC_VMAC2HU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_MACD"
+  "vmac2hu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "vmpy2h"
+  [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+	(unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
+		      (match_operand:V2HI 2 "register_operand" "r,r")]
+		     UNSPEC_ARC_VMPY2H))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_MACD"
+  "vmpy2h%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "vmpy2hu"
+  [(set (match_operand:V2SI 0 "even_register_operand" "=r,r")
+	(unspec:V2SI [(match_operand:V2HI 1 "register_operand" "0,r")
+		      (match_operand:V2HI 2 "register_operand" "r,r")]
+		     UNSPEC_ARC_VMPY2HU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_MACD"
+  "vmpy2hu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "qmach"
+  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+	(unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
+		    (match_operand:V4HI 2 "even_register_operand" "r,r")
+		    (reg:DI ARCV2_ACC)]
+		     UNSPEC_ARC_QMACH))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "qmach%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "qmachu"
+  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+	(unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
+		    (match_operand:V4HI 2 "even_register_operand" "r,r")
+		    (reg:DI ARCV2_ACC)]
+		   UNSPEC_ARC_QMACHU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "qmachu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "qmpyh"
+  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+	(unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
+		    (match_operand:V4HI 2 "even_register_operand" "r,r")]
+		     UNSPEC_ARC_QMPYH))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "qmpyh%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
+
+(define_insn "qmpyhu"
+  [(set (match_operand:DI 0 "even_register_operand" "=r,r")
+	(unspec:DI [(match_operand:V4HI 1 "even_register_operand" "0,r")
+		    (match_operand:V4HI 2 "even_register_operand" "r,r")]
+		   UNSPEC_ARC_QMPYHU))
+   (clobber (reg:DI ARCV2_ACC))]
+  "TARGET_PLUS_QMACW"
+  "qmpyhu%? %0, %1, %2"
+  [(set_attr "length" "4")
+   (set_attr "type" "multi")
+   (set_attr "predicable" "yes,no")
+   (set_attr "cond" "canuse,nocond")])
diff --git a/gcc/testsuite/gcc.target/arc/builtin_simdarc.c b/gcc/testsuite/gcc.target/arc/builtin_simdarc.c
new file mode 100644
index 000000000000..68aae40ca58e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/builtin_simdarc.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-mcpu=archs -O2 -Werror-implicit-function-declaration -mmpy-option=9" } */
+
+#define STEST(name, rettype, op1type, op2type)	\
+  rettype test_ ## name				\
+  (op1type a, op2type b)			\
+  {						\
+    return __builtin_arc_ ## name (a, b);	\
+  }
+
+typedef short v2hi __attribute__ ((vector_size (4)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef int   v2si __attribute__ ((vector_size (8)));
+
+STEST (qmach,  long long, v4hi, v4hi)
+STEST (qmachu, long long, v4hi, v4hi)
+STEST (qmpyh,  long long, v4hi, v4hi)
+STEST (qmpyhu, long long, v4hi, v4hi)
+
+STEST (dmach,  int, v2hi, v2hi)
+STEST (dmachu, int, v2hi, v2hi)
+STEST (dmpyh,  int, v2hi, v2hi)
+STEST (dmpyhu, int, v2hi, v2hi)
+
+STEST (dmacwh,  long, v2si, v2hi)
+STEST (dmacwhu, long, v2si, v2hi)
+
+STEST (vmac2h,  v2si, v2hi, v2hi)
+STEST (vmac2hu, v2si, v2hi, v2hi)
+STEST (vmpy2h,  v2si, v2hi, v2hi)
+STEST (vmpy2hu, v2si, v2hi, v2hi)
+
+STEST (vaddsub2h, v2hi, v2hi, v2hi)
+STEST (vsubadd2h, v2hi, v2hi, v2hi)
+STEST (vaddsub,   v2si, v2si, v2si)
+STEST (vsubadd,   v2si, v2si, v2si)
+STEST (vaddsub4h, v4hi, v4hi, v4hi)
+STEST (vsubadd4h, v4hi, v4hi, v4hi)
-- 
GitLab