diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 27dcc96db763bb4a22bb0ba1367ae77a66eba449..825208bf99a4eb834579e1ef315834ae1b3343c6 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2004-06-12  Roger Sayle  <roger@eyesopen.com>
+
+	* expmed.c (shift_cost, shiftadd_cost, shiftsub_cost): Additionally
+	index by machine mode.
+	(init_expmed): Initialize shift_cost, shiftadd_cost and shiftsub_cost
+	tables inside the loop over machine modes.
+	(synth_mult, expand_mult_highpart_optab, expand_mult_highpart,
+	expand_divmod): Index shift*_cost by the appropriate machine mode.
+
 2004-06-12  Eric Christopher  <echristo@redhat.com>
 
 	* config/s390/s390.h: Rename TARGET_TPF to TARGET_TPF_PROFILING.
diff --git a/gcc/expmed.c b/gcc/expmed.c
index 00df7ce0c7be3c97a660ea1ab350a141e8b4ee30..f45de5d56cc9a0baaf6dcab5765b5c1617040bac 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -94,9 +94,9 @@ static int smod_pow2_cheap[NUM_MACHINE_MODES];
 static int zero_cost;
 static int add_cost[NUM_MACHINE_MODES];
 static int neg_cost[NUM_MACHINE_MODES];
-static int shift_cost[MAX_BITS_PER_WORD];
-static int shiftadd_cost[MAX_BITS_PER_WORD];
-static int shiftsub_cost[MAX_BITS_PER_WORD];
+static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
+static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
+static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 static int mul_cost[NUM_MACHINE_MODES];
 static int div_cost[NUM_MACHINE_MODES];
 static int mul_widen_cost[NUM_MACHINE_MODES];
@@ -106,38 +106,24 @@ void
 init_expmed (void)
 {
   rtx reg, shift_insn, shiftadd_insn, shiftsub_insn;
+  rtx shift_pat, shiftadd_pat, shiftsub_pat;
+  rtx pow2[MAX_BITS_PER_WORD];
+  rtx cint[MAX_BITS_PER_WORD];
   int dummy;
-  int m;
+  int m, n;
   enum machine_mode mode, wider_mode;
 
   start_sequence ();
 
-  /* This is "some random pseudo register" for purposes of calling recog
-     to see what insns exist.  */
-  reg = gen_rtx_REG (word_mode, 10000);
-
   zero_cost = rtx_cost (const0_rtx, 0);
 
-  shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
-				       gen_rtx_ASHIFT (word_mode, reg,
-						       const0_rtx)));
-
-  shiftadd_insn
-    = emit_insn (gen_rtx_SET (VOIDmode, reg,
-			      gen_rtx_PLUS (word_mode,
-					    gen_rtx_MULT (word_mode,
-							  reg, const0_rtx),
-					    reg)));
-
-  shiftsub_insn
-    = emit_insn (gen_rtx_SET (VOIDmode, reg,
-			      gen_rtx_MINUS (word_mode,
-					     gen_rtx_MULT (word_mode,
-							   reg, const0_rtx),
-					     reg)));
-
   init_recog ();
 
+  for (m = 1; m < MAX_BITS_PER_WORD; m++)
+    {
+      pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
+      cint[m] = GEN_INT (m);
+    }
 
   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
        mode != VOIDmode;
@@ -176,27 +162,52 @@ init_expmed (void)
 					   GEN_INT (GET_MODE_BITSIZE (mode)))),
 			SET);
 	}
-    }
 
-  shift_cost[0] = 0;
-  shiftadd_cost[0] = shiftsub_cost[0] = add_cost[word_mode];
-
-  for (m = 1; m < MAX_BITS_PER_WORD; m++)
-    {
-      rtx c_int = GEN_INT ((HOST_WIDE_INT) 1 << m);
-      shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
-
-      XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
-      if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
-	shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
-
-      XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1) = c_int;
-      if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
-	shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
-
-      XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1) = c_int;
-      if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
-	shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
+	shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
+					     gen_rtx_ASHIFT (mode, reg,
+							     const0_rtx)));
+
+	shiftadd_insn
+	  = emit_insn (gen_rtx_SET (VOIDmode, reg,
+				    gen_rtx_PLUS (mode,
+						  gen_rtx_MULT (mode,
+								reg,
+								const0_rtx),
+						  reg)));
+
+	shiftsub_insn
+	  = emit_insn (gen_rtx_SET (VOIDmode, reg,
+				    gen_rtx_MINUS (mode,
+						   gen_rtx_MULT (mode,
+								 reg,
+								 const0_rtx),
+						   reg)));
+
+	shift_pat = PATTERN (shift_insn);
+	shiftadd_pat = PATTERN (shiftadd_insn);
+	shiftsub_pat = PATTERN (shiftsub_insn);
+
+	shift_cost[mode][0] = 0;
+	shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
+
+	n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
+	for (m = 1; m < n; m++)
+	  {
+	    shift_cost[mode][m] = 32000;
+	    XEXP (SET_SRC (shift_pat), 1) = cint[m];
+	    if (recog (shift_pat, shift_insn, &dummy) >= 0)
+	      shift_cost[mode][m] = rtx_cost (SET_SRC (shift_pat), SET);
+
+	    shiftadd_cost[mode][m] = 32000;
+	    XEXP (XEXP (SET_SRC (shiftadd_pat), 0), 1) = pow2[m];
+	    if (recog (shiftadd_pat, shiftadd_insn, &dummy) >= 0)
+	      shiftadd_cost[mode][m] = rtx_cost (SET_SRC (shiftadd_pat), SET);
+
+	    shiftsub_cost[mode][m] = 32000;
+	    XEXP (XEXP (SET_SRC (shiftsub_pat), 0), 1) = pow2[m];
+	    if (recog (shiftsub_pat, shiftsub_insn, &dummy) >= 0)
+	      shiftsub_cost[mode][m] = rtx_cost (SET_SRC (shiftsub_pat), SET);
+	  }
     }
 
   end_sequence ();
@@ -2226,7 +2237,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
       if (m < BITS_PER_WORD)
 	{
 	  q = t >> m;
-	  cost = shift_cost[m];
+	  cost = shift_cost[mode][m];
 	  synth_mult (alg_in, q, cost_limit - cost, mode);
 
 	  cost += alg_in->cost;
@@ -2310,9 +2321,9 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
       if (t % d == 0 && t > d && m < BITS_PER_WORD)
 	{
-	  cost = add_cost[mode] + shift_cost[m];
-	  if (shiftadd_cost[m] < cost)
-	    cost = shiftadd_cost[m];
+	  cost = add_cost[mode] + shift_cost[mode][m];
+	  if (shiftadd_cost[mode][m] < cost)
+	    cost = shiftadd_cost[mode][m];
 	  synth_mult (alg_in, t / d, cost_limit - cost, mode);
 
 	  cost += alg_in->cost;
@@ -2331,9 +2342,9 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
       if (t % d == 0 && t > d && m < BITS_PER_WORD)
 	{
-	  cost = add_cost[mode] + shift_cost[m];
-	  if (shiftsub_cost[m] < cost)
-	    cost = shiftsub_cost[m];
+	  cost = add_cost[mode] + shift_cost[mode][m];
+	  if (shiftsub_cost[mode][m] < cost)
+	    cost = shiftsub_cost[mode][m];
 	  synth_mult (alg_in, t / d, cost_limit - cost, mode);
 
 	  cost += alg_in->cost;
@@ -2358,7 +2369,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
       m = exact_log2 (q);
       if (m >= 0 && m < BITS_PER_WORD)
 	{
-	  cost = shiftadd_cost[m];
+	  cost = shiftadd_cost[mode][m];
 	  synth_mult (alg_in, (t - 1) >> m, cost_limit - cost, mode);
 
 	  cost += alg_in->cost;
@@ -2377,7 +2388,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
       m = exact_log2 (q);
       if (m >= 0 && m < BITS_PER_WORD)
 	{
-	  cost = shiftsub_cost[m];
+	  cost = shiftsub_cost[mode][m];
 	  synth_mult (alg_in, (t + 1) >> m, cost_limit - cost, mode);
 
 	  cost += alg_in->cost;
@@ -2911,7 +2922,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
      Need to adjust the result after the multiplication.  */
   if (size - 1 < BITS_PER_WORD
-      && (mul_highpart_cost[mode] + 2 * shift_cost[size-1]
+      && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
 	  + 4 * add_cost[mode] < max_cost))
     {
       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
@@ -2938,7 +2949,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
   moptab = smul_optab;
   if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
       && size - 1 < BITS_PER_WORD
-      && mul_cost[wider_mode] + shift_cost[size-1] < max_cost)
+      && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
     {
       tem = expand_binop (wider_mode, moptab, op0, op1, 0,
 			  unsignedp, OPTAB_WIDEN);
@@ -2950,7 +2961,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
   if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
       && size - 1 < BITS_PER_WORD
-      && (mul_widen_cost[wider_mode] + 2 * shift_cost[size-1]
+      && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
 	  + 4 * add_cost[mode] < max_cost))
     {
       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
@@ -3004,7 +3015,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
     return expand_mult_highpart_optab (mode, op0, op1, target,
 				       unsignedp, max_cost);
 
-  extra_cost = shift_cost[GET_MODE_BITSIZE (mode) - 1];
+  extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
 
   /* Check whether we try to multiply by a negative constant.  */
   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
@@ -3344,9 +3355,10 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			    if (post_shift - 1 >= BITS_PER_WORD)
 			      goto fail1;
 
-			    extra_cost = (shift_cost[post_shift - 1]
-					  + shift_cost[1]
-					  + 2 * add_cost[compute_mode]);
+			    extra_cost
+			      = (shift_cost[compute_mode][post_shift - 1]
+				 + shift_cost[compute_mode][1]
+				 + 2 * add_cost[compute_mode]);
 			    t1 = expand_mult_highpart (compute_mode, op0, ml,
 						       NULL_RTX, 1,
 						       max_cost - extra_cost);
@@ -3376,8 +3388,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			    t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
 					       build_int_2 (pre_shift, 0),
 					       NULL_RTX, 1);
-			    extra_cost = (shift_cost[pre_shift]
-					  + shift_cost[post_shift]);
+			    extra_cost
+			      = (shift_cost[compute_mode][pre_shift]
+				 + shift_cost[compute_mode][post_shift]);
 			    t2 = expand_mult_highpart (compute_mode, t1, ml,
 						       NULL_RTX, 1,
 						       max_cost - extra_cost);
@@ -3511,8 +3524,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			    || size - 1 >= BITS_PER_WORD)
 			  goto fail1;
 
-			extra_cost = (shift_cost[post_shift]
-				      + shift_cost[size - 1]
+			extra_cost = (shift_cost[compute_mode][post_shift]
+				      + shift_cost[compute_mode][size - 1]
 				      + add_cost[compute_mode]);
 			t1 = expand_mult_highpart (compute_mode, op0, ml,
 						   NULL_RTX, 0,
@@ -3543,8 +3556,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			  goto fail1;
 
 			ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
-			extra_cost = (shift_cost[post_shift]
-				      + shift_cost[size - 1]
+			extra_cost = (shift_cost[compute_mode][post_shift]
+				      + shift_cost[compute_mode][size - 1]
 				      + 2 * add_cost[compute_mode]);
 			t1 = expand_mult_highpart (compute_mode, op0, ml,
 						   NULL_RTX, 0,
@@ -3634,8 +3647,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 					   NULL_RTX, 0);
 			t2 = expand_binop (compute_mode, xor_optab, op0, t1,
 					   NULL_RTX, 0, OPTAB_WIDEN);
-			extra_cost = (shift_cost[post_shift]
-				      + shift_cost[size - 1]
+			extra_cost = (shift_cost[compute_mode][post_shift]
+				      + shift_cost[compute_mode][size - 1]
 				      + 2 * add_cost[compute_mode]);
 			t3 = expand_mult_highpart (compute_mode, t2, ml,
 						   NULL_RTX, 1,