diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 95e3dc5047c5ea32cca663f0cf32dc0d9bb3d086..dee6da04808093071e30afe29892eb539a8a7ed1 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2016-11-15  Richard Sandiford  <richard.sandiford@arm.com>
+	    Alan Hayward  <alan.hayward@arm.com>
+	    David Sherwood  <david.sherwood@arm.com>
+
+	* combine.c (simplify_shift_const_1): Use the number of bits
+	in the inner mode to determine the range of the shift.
+	When handling shifts of vectors, skip any rules that apply
+	only to scalars.
+
 2016-11-15  Richard Sandiford  <richard.sandiford@arm.com>
 	    Alan Hayward  <alan.hayward@arm.com>
 	    David Sherwood  <david.sherwood@arm.com>
diff --git a/gcc/combine.c b/gcc/combine.c
index 2de2a3ef8a273bbb819f2f2f17d6372db27a7fb9..3f197fa60a6bca306fdfd7521bdac52224e30e1c 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -10228,12 +10228,12 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
      want to do this inside the loop as it makes it more difficult to
      combine shifts.  */
   if (SHIFT_COUNT_TRUNCATED)
-    orig_count &= GET_MODE_BITSIZE (mode) - 1;
+    orig_count &= GET_MODE_UNIT_BITSIZE (mode) - 1;
 
   /* If we were given an invalid count, don't do anything except exactly
      what was requested.  */
 
-  if (orig_count < 0 || orig_count >= (int) GET_MODE_PRECISION (mode))
+  if (orig_count < 0 || orig_count >= (int) GET_MODE_UNIT_PRECISION (mode))
     return NULL_RTX;
 
   count = orig_count;
@@ -10250,16 +10250,14 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
       /* Convert ROTATERT to ROTATE.  */
       if (code == ROTATERT)
 	{
-	  unsigned int bitsize = GET_MODE_PRECISION (result_mode);
+	  unsigned int bitsize = GET_MODE_UNIT_PRECISION (result_mode);
 	  code = ROTATE;
-	  if (VECTOR_MODE_P (result_mode))
-	    count = bitsize / GET_MODE_NUNITS (result_mode) - count;
-	  else
-	    count = bitsize - count;
+	  count = bitsize - count;
 	}
 
       shift_mode = try_widen_shift_mode (code, varop, count, result_mode,
 					 mode, outer_op, outer_const);
+      machine_mode shift_unit_mode = GET_MODE_INNER (shift_mode);
 
       /* Handle cases where the count is greater than the size of the mode
 	 minus 1.  For ASHIFT, use the size minus one as the count (this can
@@ -10271,12 +10269,12 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	 multiple operations, each of which are defined, we know what the
 	 result is supposed to be.  */
 
-      if (count > (GET_MODE_PRECISION (shift_mode) - 1))
+      if (count > (GET_MODE_PRECISION (shift_unit_mode) - 1))
 	{
 	  if (code == ASHIFTRT)
-	    count = GET_MODE_PRECISION (shift_mode) - 1;
+	    count = GET_MODE_PRECISION (shift_unit_mode) - 1;
 	  else if (code == ROTATE || code == ROTATERT)
-	    count %= GET_MODE_PRECISION (shift_mode);
+	    count %= GET_MODE_PRECISION (shift_unit_mode);
 	  else
 	    {
 	      /* We can't simply return zero because there may be an
@@ -10292,44 +10290,49 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
       if (complement_p)
 	break;
 
-      /* An arithmetic right shift of a quantity known to be -1 or 0
-	 is a no-op.  */
-      if (code == ASHIFTRT
-	  && (num_sign_bit_copies (varop, shift_mode)
-	      == GET_MODE_PRECISION (shift_mode)))
+      if (shift_mode == shift_unit_mode)
 	{
-	  count = 0;
-	  break;
-	}
+	  /* An arithmetic right shift of a quantity known to be -1 or 0
+	     is a no-op.  */
+	  if (code == ASHIFTRT
+	      && (num_sign_bit_copies (varop, shift_unit_mode)
+		  == GET_MODE_PRECISION (shift_unit_mode)))
+	    {
+	      count = 0;
+	      break;
+	    }
 
-      /* If we are doing an arithmetic right shift and discarding all but
-	 the sign bit copies, this is equivalent to doing a shift by the
-	 bitsize minus one.  Convert it into that shift because it will often
-	 allow other simplifications.  */
-
-      if (code == ASHIFTRT
-	  && (count + num_sign_bit_copies (varop, shift_mode)
-	      >= GET_MODE_PRECISION (shift_mode)))
-	count = GET_MODE_PRECISION (shift_mode) - 1;
-
-      /* We simplify the tests below and elsewhere by converting
-	 ASHIFTRT to LSHIFTRT if we know the sign bit is clear.
-	 `make_compound_operation' will convert it to an ASHIFTRT for
-	 those machines (such as VAX) that don't have an LSHIFTRT.  */
-      if (code == ASHIFTRT
-	  && val_signbit_known_clear_p (shift_mode,
-					nonzero_bits (varop, shift_mode)))
-	code = LSHIFTRT;
-
-      if (((code == LSHIFTRT
-	    && HWI_COMPUTABLE_MODE_P (shift_mode)
-	    && !(nonzero_bits (varop, shift_mode) >> count))
-	   || (code == ASHIFT
-	       && HWI_COMPUTABLE_MODE_P (shift_mode)
-	       && !((nonzero_bits (varop, shift_mode) << count)
-		    & GET_MODE_MASK (shift_mode))))
-	  && !side_effects_p (varop))
-	varop = const0_rtx;
+	  /* If we are doing an arithmetic right shift and discarding all but
+	     the sign bit copies, this is equivalent to doing a shift by the
+	     bitsize minus one.  Convert it into that shift because it will
+	     often allow other simplifications.  */
+
+	  if (code == ASHIFTRT
+	      && (count + num_sign_bit_copies (varop, shift_unit_mode)
+		  >= GET_MODE_PRECISION (shift_unit_mode)))
+	    count = GET_MODE_PRECISION (shift_unit_mode) - 1;
+
+	  /* We simplify the tests below and elsewhere by converting
+	     ASHIFTRT to LSHIFTRT if we know the sign bit is clear.
+	     `make_compound_operation' will convert it to an ASHIFTRT for
+	     those machines (such as VAX) that don't have an LSHIFTRT.  */
+	  if (code == ASHIFTRT
+	      && HWI_COMPUTABLE_MODE_P (shift_unit_mode)
+	      && val_signbit_known_clear_p (shift_unit_mode,
+					    nonzero_bits (varop,
+							  shift_unit_mode)))
+	    code = LSHIFTRT;
+
+	  if (((code == LSHIFTRT
+		&& HWI_COMPUTABLE_MODE_P (shift_unit_mode)
+		&& !(nonzero_bits (varop, shift_unit_mode) >> count))
+	       || (code == ASHIFT
+		   && HWI_COMPUTABLE_MODE_P (shift_unit_mode)
+		   && !((nonzero_bits (varop, shift_unit_mode) << count)
+			& GET_MODE_MASK (shift_unit_mode))))
+	      && !side_effects_p (varop))
+	    varop = const0_rtx;
+	}
 
       switch (GET_CODE (varop))
 	{
@@ -10346,6 +10349,10 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	  break;
 
 	case MEM:
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
+	    break;
+
 	  /* If we have (xshiftrt (mem ...) C) and C is MODE_WIDTH
 	     minus the width of a smaller mode, we can do this with a
 	     SIGN_EXTEND or ZERO_EXTEND from the narrower memory location.  */
@@ -10368,6 +10375,10 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	  break;
 
 	case SUBREG:
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
+	    break;
+
 	  /* If VAROP is a SUBREG, strip it as long as the inner operand has
 	     the same number of words as what we've seen so far.  Then store
 	     the widest mode in MODE.  */
@@ -10424,9 +10435,9 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	     interpreted as the sign bit in a narrower mode, so, if
 	     the result is narrower, don't discard the shift.  */
 	  if (code == LSHIFTRT
-	      && count == (GET_MODE_BITSIZE (result_mode) - 1)
-	      && (GET_MODE_BITSIZE (result_mode)
-		  >= GET_MODE_BITSIZE (GET_MODE (varop))))
+	      && count == (GET_MODE_UNIT_BITSIZE (result_mode) - 1)
+	      && (GET_MODE_UNIT_BITSIZE (result_mode)
+		  >= GET_MODE_UNIT_BITSIZE (GET_MODE (varop))))
 	    {
 	      varop = XEXP (varop, 0);
 	      continue;
@@ -10437,14 +10448,17 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	case LSHIFTRT:
 	case ASHIFT:
 	case ROTATE:
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
+	    break;
+
 	  /* Here we have two nested shifts.  The result is usually the
 	     AND of a new shift with a mask.  We compute the result below.  */
 	  if (CONST_INT_P (XEXP (varop, 1))
 	      && INTVAL (XEXP (varop, 1)) >= 0
 	      && INTVAL (XEXP (varop, 1)) < GET_MODE_PRECISION (GET_MODE (varop))
 	      && HWI_COMPUTABLE_MODE_P (result_mode)
-	      && HWI_COMPUTABLE_MODE_P (mode)
-	      && !VECTOR_MODE_P (result_mode))
+	      && HWI_COMPUTABLE_MODE_P (mode))
 	    {
 	      enum rtx_code first_code = GET_CODE (varop);
 	      unsigned int first_count = INTVAL (XEXP (varop, 1));
@@ -10610,7 +10624,8 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	  break;
 
 	case NOT:
-	  if (VECTOR_MODE_P (mode))
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
 	    break;
 
 	  /* Make this fit the case below.  */
@@ -10620,6 +10635,10 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	case IOR:
 	case AND:
 	case XOR:
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
+	    break;
+
 	  /* If we have (xshiftrt (ior (plus X (const_int -1)) X) C)
 	     with C the size of VAROP - 1 and the shift is logical if
 	     STORE_FLAG_VALUE is 1 and arithmetic if STORE_FLAG_VALUE is -1,
@@ -10696,6 +10715,10 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	  break;
 
 	case EQ:
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
+	    break;
+
 	  /* Convert (lshiftrt (eq FOO 0) C) to (xor FOO 1) if STORE_FLAG_VALUE
 	     says that the sign bit can be tested, FOO has mode MODE, C is
 	     GET_MODE_PRECISION (MODE) - 1, and FOO has only its low-order bit
@@ -10717,6 +10740,10 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	  break;
 
 	case NEG:
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
+	    break;
+
 	  /* (lshiftrt (neg A) C) where A is either 0 or 1 and C is one less
 	     than the number of bits in the mode is equivalent to A.  */
 	  if (code == LSHIFTRT
@@ -10740,6 +10767,10 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	  break;
 
 	case PLUS:
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
+	    break;
+
 	  /* (lshiftrt (plus A -1) C) where A is either 0 or 1 and C
 	     is one less than the number of bits in the mode is
 	     equivalent to (xor A 1).  */
@@ -10821,6 +10852,10 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	  break;
 
 	case MINUS:
+	  /* The following rules apply only to scalars.  */
+	  if (shift_mode != shift_unit_mode)
+	    break;
+
 	  /* If we have (xshiftrt (minus (ashiftrt X C)) X) C)
 	     with C the size of VAROP - 1 and the shift is logical if
 	     STORE_FLAG_VALUE is 1 and arithmetic if STORE_FLAG_VALUE is -1,
@@ -10854,8 +10889,8 @@ simplify_shift_const_1 (enum rtx_code code, machine_mode result_mode,
 	      && GET_CODE (XEXP (varop, 0)) == LSHIFTRT
 	      && CONST_INT_P (XEXP (XEXP (varop, 0), 1))
 	      && (INTVAL (XEXP (XEXP (varop, 0), 1))
-		  >= (GET_MODE_PRECISION (GET_MODE (XEXP (varop, 0)))
-		      - GET_MODE_PRECISION (GET_MODE (varop)))))
+		  >= (GET_MODE_UNIT_PRECISION (GET_MODE (XEXP (varop, 0)))
+		      - GET_MODE_UNIT_PRECISION (GET_MODE (varop)))))
 	    {
 	      rtx varop_inner = XEXP (varop, 0);