diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-34.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-34.c
new file mode 100644
index 0000000000000000000000000000000000000000..9ac37c44336ab1dde6b7266b4bdd812d13b5797e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-34.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -fdump-tree-pre-stats -fdump-tree-optimized" } */
+
+void foo(void);
+static int c = 76, f, g;
+static int *h, *j, *k = &g;
+static int **i = &h;
+static short a;
+static signed char(l)(signed char b) {
+    if (!(((b) >= 77) && ((b) <= 77))) {
+        __builtin_unreachable();
+    }
+    return 0;
+}
+static short(m)(short d, short e) { return d + e; }
+static short n(signed char) {
+    j = *i;
+    if (j == 0)
+        ;
+    else
+        *i = 0;
+    *k = 0;
+    return 0;
+}
+static signed char o() {
+    l(0);
+    return 0;
+}
+static signed char p(int ad) {
+    a = m(!0, ad);
+    l(a);
+    if (f) {
+        *i &&n(o());
+        *i = 0;
+    } else
+        n(0);
+    if (h == &f || h == 0)
+        ;
+    else
+        foo();
+    return 0;
+}
+int main() {
+    p(c);
+    c = 8;
+}
+
+/* Even with main being cold we should optimize the redundant load of h
+   which is available on all incoming edges (but none considered worth
+   optimizing for speed) when doing that doesn't needlessly increase
+   code size.  */
+
+/* { dg-final { scan-tree-dump "Insertions: 1" "pre" } } */
+/* { dg-final { scan-tree-dump "HOIST inserted: 1" "pre" } } */
+/* { dg-final { scan-tree-dump "Eliminated: 3" "pre" } } */
+/* { dg-final { scan-tree-dump-not "foo" "optimized" } } */
diff --git a/gcc/tree-ssa-pre.cc b/gcc/tree-ssa-pre.cc
index 0f2e458395c8bd3ad46d44eee581cbede6ff53a4..07fb165b2a89eea2c3fad8b89be208167a054c63 100644
--- a/gcc/tree-ssa-pre.cc
+++ b/gcc/tree-ssa-pre.cc
@@ -3314,6 +3314,8 @@ do_pre_regular_insertion (basic_block block, basic_block dom,
 	  bool by_some = false;
 	  bool cant_insert = false;
 	  bool all_same = true;
+	  unsigned num_inserts = 0;
+	  unsigned num_const = 0;
 	  pre_expr first_s = NULL;
 	  edge pred;
 	  basic_block bprime;
@@ -3370,11 +3372,14 @@ do_pre_regular_insertion (basic_block block, basic_block dom,
 		{
 		  avail[pred->dest_idx] = eprime;
 		  all_same = false;
+		  num_inserts++;
 		}
 	      else
 		{
 		  avail[pred->dest_idx] = edoubleprime;
 		  by_some = true;
+		  if (edoubleprime->kind == CONSTANT)
+		    num_const++;
 		  /* We want to perform insertions to remove a redundancy on
 		     a path in the CFG we want to optimize for speed.  */
 		  if (optimize_edge_for_speed_p (pred))
@@ -3391,6 +3396,12 @@ do_pre_regular_insertion (basic_block block, basic_block dom,
 	     partially redundant.  */
 	  if (!cant_insert && !all_same && by_some)
 	    {
+	      /* If the expression is redundant on all edges and we need
+		 to at most insert one copy from a constant do the PHI
+		 insertion even when not optimizing a path that's to be
+		 optimized for speed.  */
+	      if (num_inserts == 0 && num_const <= 1)
+		do_insertion = true;
 	      if (!do_insertion)
 		{
 		  if (dump_file && (dump_flags & TDF_DETAILS))