diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 66d7d86ba2f9d936f0fdd73af0ecb0faa94d5b9c..1358d4b3542e7adb03b77a621568a7aa8c15c6db 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2019-09-25  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/91896
+	* tree-vect-loop.c (vectorizable_reduction): The single
+	def-use cycle optimization cannot apply when there's more
+	than one pattern stmt involved.
+
 2019-09-26  Richard Biener  <rguenther@suse.de>
 
 	* tree-vect-loop.c (vect_analyze_loop_operations): Analyze
diff --git a/gcc/expr.c b/gcc/expr.c
index 2f2b53f8b6905013b4214eea137d67c666b0c795..7a7070670ed86b55c9f6256bcfbabe54ea69109e 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -7230,12 +7230,13 @@ get_inner_reference (tree exp, poly_int64_pod *pbitsize,
       *punsignedp = (! INTEGRAL_TYPE_P (TREE_TYPE (exp))
 		     || TYPE_UNSIGNED (TREE_TYPE (exp)));
 
-      /* For vector types, with the correct size of access, use the mode of
-	 inner type.  */
-      if (TREE_CODE (TREE_TYPE (TREE_OPERAND (exp, 0))) == VECTOR_TYPE
-	  && TREE_TYPE (exp) == TREE_TYPE (TREE_TYPE (TREE_OPERAND (exp, 0)))
-	  && tree_int_cst_equal (size_tree, TYPE_SIZE (TREE_TYPE (exp))))
-        mode = TYPE_MODE (TREE_TYPE (exp));
+      /* For vector element types with the correct size of access or for
+         vector typed accesses use the mode of the access type.  */
+      if ((TREE_CODE (TREE_TYPE (TREE_OPERAND (exp, 0))) == VECTOR_TYPE
+	   && TREE_TYPE (exp) == TREE_TYPE (TREE_TYPE (TREE_OPERAND (exp, 0)))
+	   && tree_int_cst_equal (size_tree, TYPE_SIZE (TREE_TYPE (exp))))
+	  || VECTOR_TYPE_P (TREE_TYPE (exp)))
+	mode = TYPE_MODE (TREE_TYPE (exp));
     }
   else
     {
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 8d75c29feb1f772f1c1187520588b2cd566edbb5..19ad3936b64b06b32db928dc62aba7eb23ab673b 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2019-09-26  Richard Biener  <rguenther@suse.de>
+
+	PR middle-end/91897
+	* gcc.target/i386/pr91897.c: New testcase.
+
 2019-09-26  Martin Sebor  <msebor@redhat.com>
 
 	PR tree-optimization/91914
diff --git a/gcc/testsuite/gcc.target/i386/pr91897.c b/gcc/testsuite/gcc.target/i386/pr91897.c
new file mode 100644
index 0000000000000000000000000000000000000000..0615ad2fdcafd49fbc331917964b6eb2082a27a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr91897.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+
+typedef double Double16 __attribute__((vector_size(8*16)));
+
+void mult(Double16 *res, const Double16 *v1, const Double16 *v2)
+{
+  *res = *v1 * *v2;
+}
+
+/* We want 4 ymm loads and 4 ymm stores.  */
+/* { dg-final { scan-assembler-times "movapd" 8 } } */