From 8efc8980df50d002cc55d53f5fbfe4f04aeeaaaf Mon Sep 17 00:00:00 2001
From: Richard Sandiford <rsandifo@redhat.com>
Date: Fri, 19 Mar 2004 09:59:00 +0000
Subject: [PATCH] expmed.c (choose_mult_variant, [...]): New, split from...

	* expmed.c (choose_mult_variant, expand_mult_const): New, split from...
	(expand_mult): ...here.
	(extract_high_half): New, split out from expand_mult_highpart.
	(expand_highpart_optab): Likewise.  Don't clobber target prematurely.
	(expand_highpart): Evaluate the cost of a shift/add sequence,
	then see if any of the specialized optabs are cheaper.

From-SVN: r79673
---
 gcc/ChangeLog |   9 +
 gcc/expmed.c  | 559 ++++++++++++++++++++++++++------------------------
 2 files changed, 304 insertions(+), 264 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6ce488300203..c8d87b7df699 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2004-03-19  Richard Sandiford  <rsandifo@redhat.com>
+
+	* expmed.c (choose_mult_variant, expand_mult_const): New, split from...
+	(expand_mult): ...here.
+	(extract_high_half): New, split out from expand_mult_highpart.
+	(expand_highpart_optab): Likewise.  Don't clobber target prematurely.
+	(expand_highpart): Evaluate the cost of a shift/add sequence,
+	then see if any of the specialized optabs are cheaper.
+
 2004-03-18  Ian Lance Taylor  <ian@wasabisystems.com>
 
 	* mklibgcc.in: Remove obsolete MAYBE_USE_COLLECT2.
diff --git a/gcc/expmed.c b/gcc/expmed.c
index 8586f403e92e..da0a9fe7f0df 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -2149,11 +2149,24 @@ struct algorithm
   char log[MAX_BITS_PER_WORD];
 };
 
+/* Indicates the type of fixup needed after a constant multiplication.
+   BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
+   the result should be negated, and ADD_VARIANT means that the
+   multiplicand should be added to the result.  */
+enum mult_variant {basic_variant, negate_variant, add_variant};
+
 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
+static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
+				 struct algorithm *, enum mult_variant *);
+static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
+			      const struct algorithm *, enum mult_variant);
 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
 						 int, unsigned HOST_WIDE_INT *,
 						 int *, int *);
 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
+static rtx extract_high_half (enum machine_mode, rtx);
+static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
+				       int, int);
 /* Compute and return the best algorithm for multiplying by T.
    The algorithm must cost less than cost_limit
    If retval.cost >= COST_LIMIT, no algorithm was found and all
@@ -2396,6 +2409,198 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
 	  alg_out->ops * sizeof *alg_out->log);
 }
 
+/* Find the cheapeast way of multiplying a value of mode MODE by VAL.
+   Try three variations:
+
+       - a shift/add sequence based on VAL itself
+       - a shift/add sequence based on -VAL, followed by a negation
+       - a shift/add sequence based on VAL - 1, followed by an addition.
+
+   Return true if the cheapest of these is better than register
+   multiplication, describing the algorithm in *ALG and final
+   fixup in *VARIANT.  */
+
+static bool
+choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
+		     struct algorithm *alg, enum mult_variant *variant)
+{
+  int mult_cost;
+  struct algorithm alg2;
+  rtx reg;
+
+  reg = gen_rtx_REG (mode, FIRST_PSEUDO_REGISTER);
+  mult_cost = rtx_cost (gen_rtx_MULT (mode, reg, GEN_INT (val)), SET);
+  mult_cost = MIN (12 * add_cost, mult_cost);
+
+  *variant = basic_variant;
+  synth_mult (alg, val, mult_cost);
+
+  /* This works only if the inverted value actually fits in an
+     `unsigned int' */
+  if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
+    {
+      synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - negate_cost);
+      alg2.cost += negate_cost;
+      if (alg2.cost < alg->cost)
+	*alg = alg2, *variant = negate_variant;
+    }
+
+  /* This proves very useful for division-by-constant.  */
+  synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost);
+  alg2.cost += add_cost;
+  if (alg2.cost < alg->cost)
+    *alg = alg2, *variant = add_variant;
+
+  return alg->cost < mult_cost;
+}
+
+/* A subroutine of expand_mult, used for constant multiplications.
+   Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
+   convenient.  Use the shift/add sequence described by ALG and apply
+   the final fixup specified by VARIANT.  */
+
+static rtx
+expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
+		   rtx target, const struct algorithm *alg,
+		   enum mult_variant variant)
+{
+  HOST_WIDE_INT val_so_far;
+  rtx insn, accum, tem;
+  int opno;
+  enum machine_mode nmode;
+
+  /* op0 must be register to make mult_cost match the precomputed
+     shiftadd_cost array.  */
+  op0 = protect_from_queue (op0, 0);
+
+  /* Avoid referencing memory over and over.
+     For speed, but also for correctness when mem is volatile.  */
+  if (GET_CODE (op0) == MEM)
+    op0 = force_reg (mode, op0);
+
+  /* ACCUM starts out either as OP0 or as a zero, depending on
+     the first operation.  */
+
+  if (alg->op[0] == alg_zero)
+    {
+      accum = copy_to_mode_reg (mode, const0_rtx);
+      val_so_far = 0;
+    }
+  else if (alg->op[0] == alg_m)
+    {
+      accum = copy_to_mode_reg (mode, op0);
+      val_so_far = 1;
+    }
+  else
+    abort ();
+
+  for (opno = 1; opno < alg->ops; opno++)
+    {
+      int log = alg->log[opno];
+      int preserve = preserve_subexpressions_p ();
+      rtx shift_subtarget = preserve ? 0 : accum;
+      rtx add_target
+	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
+	   && ! preserve)
+	  ? target : 0;
+      rtx accum_target = preserve ? 0 : accum;
+
+      switch (alg->op[opno])
+	{
+	case alg_shift:
+	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
+				build_int_2 (log, 0), NULL_RTX, 0);
+	  val_so_far <<= log;
+	  break;
+
+	case alg_add_t_m2:
+	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
+			      build_int_2 (log, 0), NULL_RTX, 0);
+	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
+				 add_target ? add_target : accum_target);
+	  val_so_far += (HOST_WIDE_INT) 1 << log;
+	  break;
+
+	case alg_sub_t_m2:
+	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
+			      build_int_2 (log, 0), NULL_RTX, 0);
+	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
+				 add_target ? add_target : accum_target);
+	  val_so_far -= (HOST_WIDE_INT) 1 << log;
+	  break;
+
+	case alg_add_t2_m:
+	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
+				build_int_2 (log, 0), shift_subtarget,
+				0);
+	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
+				 add_target ? add_target : accum_target);
+	  val_so_far = (val_so_far << log) + 1;
+	  break;
+
+	case alg_sub_t2_m:
+	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
+				build_int_2 (log, 0), shift_subtarget, 0);
+	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
+				 add_target ? add_target : accum_target);
+	  val_so_far = (val_so_far << log) - 1;
+	  break;
+
+	case alg_add_factor:
+	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
+			      build_int_2 (log, 0), NULL_RTX, 0);
+	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
+				 add_target ? add_target : accum_target);
+	  val_so_far += val_so_far << log;
+	  break;
+
+	case alg_sub_factor:
+	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
+			      build_int_2 (log, 0), NULL_RTX, 0);
+	  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
+				 (add_target ? add_target
+				  : preserve ? 0 : tem));
+	  val_so_far = (val_so_far << log) - val_so_far;
+	  break;
+
+	default:
+	  abort ();
+	}
+
+      /* Write a REG_EQUAL note on the last insn so that we can cse
+	 multiplication sequences.  Note that if ACCUM is a SUBREG,
+	 we've set the inner register and must properly indicate
+	 that.  */
+
+      tem = op0, nmode = mode;
+      if (GET_CODE (accum) == SUBREG)
+	{
+	  nmode = GET_MODE (SUBREG_REG (accum));
+	  tem = gen_lowpart (nmode, op0);
+	}
+
+      insn = get_last_insn ();
+      set_unique_reg_note (insn, REG_EQUAL,
+			   gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
+    }
+
+  if (variant == negate_variant)
+    {
+      val_so_far = -val_so_far;
+      accum = expand_unop (mode, neg_optab, accum, target, 0);
+    }
+  else if (variant == add_variant)
+    {
+      val_so_far = val_so_far + 1;
+      accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
+    }
+
+  if (val != val_so_far)
+    abort ();
+
+  return accum;
+}
+
 /* Perform a multiplication and return an rtx for the result.
    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
    TARGET is a suggestion for where to store the result (an rtx).
@@ -2409,6 +2614,8 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
 	     int unsignedp)
 {
   rtx const_op1 = op1;
+  enum mult_variant variant;
+  struct algorithm algorithm;
 
   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
      less than or equal in size to `unsigned int' this doesn't matter.
@@ -2435,190 +2642,10 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
      that it seems better to use synth_mult always.  */
 
   if (const_op1 && GET_CODE (const_op1) == CONST_INT
-      && (unsignedp || ! flag_trapv))
-    {
-      struct algorithm alg;
-      struct algorithm alg2;
-      HOST_WIDE_INT val = INTVAL (op1);
-      HOST_WIDE_INT val_so_far;
-      rtx insn;
-      int mult_cost;
-      enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
-
-      /* op0 must be register to make mult_cost match the precomputed
-         shiftadd_cost array.  */
-      op0 = force_reg (mode, op0);
-
-      /* Try to do the computation three ways: multiply by the negative of OP1
-	 and then negate, do the multiplication directly, or do multiplication
-	 by OP1 - 1.  */
-
-      mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
-      mult_cost = MIN (12 * add_cost, mult_cost);
-
-      synth_mult (&alg, val, mult_cost);
-
-      /* This works only if the inverted value actually fits in an
-	 `unsigned int' */
-      if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
-	{
-	  synth_mult (&alg2, - val,
-		      (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
-	  if (alg2.cost + negate_cost < alg.cost)
-	    alg = alg2, variant = negate_variant;
-	}
-
-      /* This proves very useful for division-by-constant.  */
-      synth_mult (&alg2, val - 1,
-		  (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
-      if (alg2.cost + add_cost < alg.cost)
-	alg = alg2, variant = add_variant;
-
-      if (alg.cost < mult_cost)
-	{
-	  /* We found something cheaper than a multiply insn.  */
-	  int opno;
-	  rtx accum, tem;
-	  enum machine_mode nmode;
-
-	  op0 = protect_from_queue (op0, 0);
-
-	  /* Avoid referencing memory over and over.
-	     For speed, but also for correctness when mem is volatile.  */
-	  if (GET_CODE (op0) == MEM)
-	    op0 = force_reg (mode, op0);
-
-	  /* ACCUM starts out either as OP0 or as a zero, depending on
-	     the first operation.  */
-
-	  if (alg.op[0] == alg_zero)
-	    {
-	      accum = copy_to_mode_reg (mode, const0_rtx);
-	      val_so_far = 0;
-	    }
-	  else if (alg.op[0] == alg_m)
-	    {
-	      accum = copy_to_mode_reg (mode, op0);
-	      val_so_far = 1;
-	    }
-	  else
-	    abort ();
-
-	  for (opno = 1; opno < alg.ops; opno++)
-	    {
-	      int log = alg.log[opno];
-	      int preserve = preserve_subexpressions_p ();
-	      rtx shift_subtarget = preserve ? 0 : accum;
-	      rtx add_target
-		= (opno == alg.ops - 1 && target != 0 && variant != add_variant
-		   && ! preserve)
-		  ? target : 0;
-	      rtx accum_target = preserve ? 0 : accum;
-
-	      switch (alg.op[opno])
-		{
-		case alg_shift:
-		  accum = expand_shift (LSHIFT_EXPR, mode, accum,
-					build_int_2 (log, 0), NULL_RTX, 0);
-		  val_so_far <<= log;
-		  break;
-
-		case alg_add_t_m2:
-		  tem = expand_shift (LSHIFT_EXPR, mode, op0,
-				      build_int_2 (log, 0), NULL_RTX, 0);
-		  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
-					 add_target
-					 ? add_target : accum_target);
-		  val_so_far += (HOST_WIDE_INT) 1 << log;
-		  break;
-
-		case alg_sub_t_m2:
-		  tem = expand_shift (LSHIFT_EXPR, mode, op0,
-				      build_int_2 (log, 0), NULL_RTX, 0);
-		  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
-					 add_target
-					 ? add_target : accum_target);
-		  val_so_far -= (HOST_WIDE_INT) 1 << log;
-		  break;
-
-		case alg_add_t2_m:
-		  accum = expand_shift (LSHIFT_EXPR, mode, accum,
-					build_int_2 (log, 0), shift_subtarget,
-					0);
-		  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
-					 add_target
-					 ? add_target : accum_target);
-		  val_so_far = (val_so_far << log) + 1;
-		  break;
-
-		case alg_sub_t2_m:
-		  accum = expand_shift (LSHIFT_EXPR, mode, accum,
-					build_int_2 (log, 0), shift_subtarget,
-					0);
-		  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
-					 add_target
-					 ? add_target : accum_target);
-		  val_so_far = (val_so_far << log) - 1;
-		  break;
-
-		case alg_add_factor:
-		  tem = expand_shift (LSHIFT_EXPR, mode, accum,
-				      build_int_2 (log, 0), NULL_RTX, 0);
-		  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
-					 add_target
-					 ? add_target : accum_target);
-		  val_so_far += val_so_far << log;
-		  break;
-
-		case alg_sub_factor:
-		  tem = expand_shift (LSHIFT_EXPR, mode, accum,
-				      build_int_2 (log, 0), NULL_RTX, 0);
-		  accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
-					 (add_target ? add_target
-					  : preserve ? 0 : tem));
-		  val_so_far = (val_so_far << log) - val_so_far;
-		  break;
-
-		default:
-		  abort ();
-		}
-
-	      /* Write a REG_EQUAL note on the last insn so that we can cse
-		 multiplication sequences.  Note that if ACCUM is a SUBREG,
-		 we've set the inner register and must properly indicate
-		 that.  */
-
-	      tem = op0, nmode = mode;
-	      if (GET_CODE (accum) == SUBREG)
-		{
-		  nmode = GET_MODE (SUBREG_REG (accum));
-		  tem = gen_lowpart (nmode, op0);
-		}
-
-	      insn = get_last_insn ();
-	      set_unique_reg_note (insn,
-				   REG_EQUAL,
-				   gen_rtx_MULT (nmode, tem,
-					         GEN_INT (val_so_far)));
-	    }
-
-	  if (variant == negate_variant)
-	    {
-	      val_so_far = - val_so_far;
-	      accum = expand_unop (mode, neg_optab, accum, target, 0);
-	    }
-	  else if (variant == add_variant)
-	    {
-	      val_so_far = val_so_far + 1;
-	      accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
-	    }
-
-	  if (val != val_so_far)
-	    abort ();
-
-	  return accum;
-	}
-    }
+      && (unsignedp || !flag_trapv)
+      && choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant))
+    return expand_mult_const (mode, op0, INTVAL (const_op1), target,
+			      &algorithm, variant);
 
   if (GET_CODE (op0) == CONST_DOUBLE)
     {
@@ -2832,70 +2859,46 @@ expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
   return target;
 }
 
-/* Emit code to multiply OP0 and CNST1, putting the high half of the result
-   in TARGET if that is convenient, and return where the result is.  If the
-   operation can not be performed, 0 is returned.
+/* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
 
-   MODE is the mode of operation and result.
+static rtx
+extract_high_half (enum machine_mode mode, rtx op)
+{
+  enum machine_mode wider_mode;
 
-   UNSIGNEDP nonzero means unsigned multiply.
+  if (mode == word_mode)
+    return gen_highpart (mode, op);
 
-   MAX_COST is the total allowed cost for the expanded RTL.  */
+  wider_mode = GET_MODE_WIDER_MODE (mode);
+  op = expand_shift (RSHIFT_EXPR, wider_mode, op,
+		     build_int_2 (GET_MODE_BITSIZE (mode), 0), 0, 1);
+  return convert_modes (mode, wider_mode, op, 0);
+}
 
-rtx
-expand_mult_highpart (enum machine_mode mode, rtx op0,
-		      unsigned HOST_WIDE_INT cnst1, rtx target,
-		      int unsignedp, int max_cost)
+/* Like expand_mult_highpart, but only consider using a multiplication
+   optab.  OP1 is an rtx for the constant operand.  */
+
+static rtx
+expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
+			    rtx target, int unsignedp, int max_cost)
 {
-  enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
-  optab mul_highpart_optab;
+  enum machine_mode wider_mode;
   optab moptab;
   rtx tem;
-  int size = GET_MODE_BITSIZE (mode);
-  rtx op1, wide_op1;
-
-  /* We can't support modes wider than HOST_BITS_PER_INT.  */
-  if (size > HOST_BITS_PER_WIDE_INT)
-    abort ();
-
-  op1 = gen_int_mode (cnst1, mode);
-
-  wide_op1
-    = immed_double_const (cnst1,
-			  (unsignedp
-			   ? (HOST_WIDE_INT) 0
-			   : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
-			  wider_mode);
-
-  /* expand_mult handles constant multiplication of word_mode
-     or narrower.  It does a poor job for large modes.  */
-  if (size < BITS_PER_WORD
-      && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
-    {
-      /* We have to do this, since expand_binop doesn't do conversion for
-	 multiply.  Maybe change expand_binop to handle widening multiply?  */
-      op0 = convert_to_mode (wider_mode, op0, unsignedp);
-
-      /* We know that this can't have signed overflow, so pretend this is
-         an unsigned multiply.  */
-      tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
-      tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
-			  build_int_2 (size, 0), NULL_RTX, 1);
-      return convert_modes (mode, wider_mode, tem, unsignedp);
-    }
+  int size;
 
-  if (target == 0)
-    target = gen_reg_rtx (mode);
+  wider_mode = GET_MODE_WIDER_MODE (mode);
+  size = GET_MODE_BITSIZE (mode);
 
   /* Firstly, try using a multiplication insn that only generates the needed
      high part of the product, and in the sign flavor of unsignedp.  */
   if (mul_highpart_cost[(int) mode] < max_cost)
     {
-      mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
-      target = expand_binop (mode, mul_highpart_optab,
-			     op0, op1, target, unsignedp, OPTAB_DIRECT);
-      if (target)
-	return target;
+      moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
+      tem = expand_binop (mode, moptab, op0, op1, target,
+			  unsignedp, OPTAB_DIRECT);
+      if (tem)
+	return tem;
     }
 
   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
@@ -2904,13 +2907,13 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
       && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
 	  < max_cost))
     {
-      mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
-      target = expand_binop (mode, mul_highpart_optab,
-			     op0, op1, target, unsignedp, OPTAB_DIRECT);
-      if (target)
+      moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
+      tem = expand_binop (mode, moptab, op0, op1, target,
+			  unsignedp, OPTAB_DIRECT);
+      if (tem)
 	/* We used the wrong signedness.  Adjust the result.  */
-	return expand_mult_highpart_adjust (mode, target, op0,
-					    op1, target, unsignedp);
+	return expand_mult_highpart_adjust (mode, tem, op0, op1,
+					    tem, unsignedp);
     }
 
   /* Try widening multiplication.  */
@@ -2918,8 +2921,10 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
       && mul_widen_cost[(int) wider_mode] < max_cost)
     {
-      op1 = force_reg (mode, op1);
-      goto try;
+      tem = expand_binop (wider_mode, moptab, op0, op1, 0,
+			  unsignedp, OPTAB_WIDEN);
+      if (tem)
+	return extract_high_half (mode, tem);
     }
 
   /* Try widening the mode and perform a non-widening multiplication.  */
@@ -2928,8 +2933,10 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
       && size - 1 < BITS_PER_WORD
       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
     {
-      op1 = wide_op1;
-      goto try;
+      tem = expand_binop (wider_mode, moptab, op0, op1, 0,
+			  unsignedp, OPTAB_WIDEN);
+      if (tem)
+	return extract_high_half (mode, tem);
     }
 
   /* Try widening multiplication of opposite signedness, and adjust.  */
@@ -2944,10 +2951,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
       if (tem != 0)
 	{
-	  /* Extract the high half of the just generated product.  */
-	  tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
-			      build_int_2 (size, 0), NULL_RTX, 1);
-	  tem = convert_modes (mode, wider_mode, tem, unsignedp);
+	  tem = extract_high_half (mode, tem);
 	  /* We used the wrong signedness.  Adjust the result.  */
 	  return expand_mult_highpart_adjust (mode, tem, op0, op1,
 					      target, unsignedp);
@@ -2955,25 +2959,52 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
     }
 
   return 0;
+}
 
- try:
-  /* Pass NULL_RTX as target since TARGET has wrong mode.  */
-  tem = expand_binop (wider_mode, moptab, op0, op1,
-		      NULL_RTX, unsignedp, OPTAB_WIDEN);
-  if (tem == 0)
-    return 0;
+/* Emit code to multiply OP0 and CNST1, putting the high half of the result
+   in TARGET if that is convenient, and return where the result is.  If the
+   operation can not be performed, 0 is returned.
 
-  /* Extract the high half of the just generated product.  */
-  if (mode == word_mode)
-    {
-      return gen_highpart (mode, tem);
-    }
-  else
+   MODE is the mode of operation and result.
+
+   UNSIGNEDP nonzero means unsigned multiply.
+
+   MAX_COST is the total allowed cost for the expanded RTL.  */
+
+rtx
+expand_mult_highpart (enum machine_mode mode, rtx op0,
+		      unsigned HOST_WIDE_INT cnst1, rtx target,
+		      int unsignedp, int max_cost)
+{
+  enum machine_mode wider_mode;
+  enum mult_variant variant;
+  struct algorithm alg;
+  rtx op1, tem;
+
+  /* We can't support modes wider than HOST_BITS_PER_INT.  */
+  if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
+    abort ();
+
+  op1 = gen_int_mode (cnst1, mode);
+
+  /* See whether shift/add multiplication is cheap enough.  */
+  if (choose_mult_variant (mode, cnst1, &alg, &variant)
+      && (alg.cost += shift_cost[GET_MODE_BITSIZE (mode) - 1]) < max_cost)
     {
-      tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
-			  build_int_2 (size, 0), NULL_RTX, 1);
-      return convert_modes (mode, wider_mode, tem, unsignedp);
+      /* See whether the specialized multiplication optabs are
+	 cheaper than the shift/add version.  */
+      tem = expand_mult_highpart_optab (mode, op0, op1, target,
+					unsignedp, alg.cost);
+      if (tem)
+	return tem;
+
+      wider_mode = GET_MODE_WIDER_MODE (mode);
+      op0 = convert_to_mode (wider_mode, op0, unsignedp);
+      tem = expand_mult_const (wider_mode, op0, cnst1, 0, &alg, variant);
+      return extract_high_half (mode, tem);
     }
+  return expand_mult_highpart_optab (mode, op0, op1, target,
+				     unsignedp, max_cost);
 }
 
 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
-- 
GitLab