From 296799ba06ab9ff273038e9b4dd93ca9ba65bf1c Mon Sep 17 00:00:00 2001
From: Georg-Johann Lay <avr@gjlay.de>
Date: Thu, 28 Jul 2011 08:03:07 +0000
Subject: [PATCH] re PR target/49687 ([avr] Missed optimization for widening
 MUL)

	PR target/49687
	* config/avr/t-avr (LIB1ASMFUNCS): Remove _xmulhisi3_exit.
	Add _muluhisi3, _mulshisi3, _usmulhisi3.
	* config/avr/libgcc.S (__mulsi3): Rewrite.
	(__mulhisi3): Rewrite.
	(__umulhisi3): Rewrite.
	(__usmulhisi3): New.
	(__muluhisi3): New.
	(__mulshisi3): New.
	(__mulohisi3): New.
	(__mulqi3, __mulqihi3, __umulqihi3, __mulhi3): Use DEFUN/ENDF to
	declare.
	* config/avr/predicates.md (pseudo_register_operand): Rewrite.
	(pseudo_register_or_const_int_operand): New.
	(combine_pseudo_register_operand): New.
	(u16_operand): New.
	(s16_operand): New.
	(o16_operand): New.
	* config/avr/avr.c (avr_rtx_costs): Handle costs for mult:SI.
	* config/avr/avr.md (QIHI, QIHI2): New mode iterators.
	(any_extend, any_extend2): New code iterators.
	(extend_prefix): New code attribute.
	(mulsi3): Rewrite. Turn insn to expander.
	(mulhisi3): Ditto.
	(umulhisi3): Ditto.
	(usmulhisi3): New expander.
	(*mulsi3): New insn-and-split.
	(mulu<mode>si3): New insn-and-split.
	(muls<mode>si3): New insn-and-split.
	(mulohisi3): New insn-and-split.
	(*uumulqihisi3, *uumulhiqisi3, *uumulhihisi3, *uumulqiqisi3,
	*usmulqihisi3, *usmulhiqisi3, *usmulhihisi3, *usmulqiqisi3,
	*sumulqihisi3, *sumulhiqisi3, *sumulhihisi3, *sumulqiqisi3,
	*ssmulqihisi3, *ssmulhiqisi3, *ssmulhihisi3, *ssmulqiqisi3): New
	insn-and-split.
	(*mulsi3_call): Rewrite.
	(*mulhisi3_call): Rewrite.
	(*umulhisi3_call): Rewrite.
	(*usmulhisi3_call): New insn.
	(*muluhisi3_call): New insn.
	(*mulshisi3_call): New insn.
	(*mulohisi3_call): New insn.
	(extendqihi2): Use combine_pseudo_register_operand as predicate
	for operand 1.
	(extendqisi2): Ditto.
	(zero_extendqihi2): Ditto.
	(zero_extendqisi2): Ditto.
	(zero_extendhisi2): Ditto.
	(extendhisi2): Ditto. Don't early-clobber operand 0.

From-SVN: r176862
---
 gcc/ChangeLog                |  52 +++++
 gcc/config/avr/avr.c         |  28 +++
 gcc/config/avr/avr.md        | 349 ++++++++++++++++++++++++++----
 gcc/config/avr/libgcc.S      | 403 ++++++++++++++++++-----------------
 gcc/config/avr/predicates.md |  43 +++-
 gcc/config/avr/t-avr         |   4 +-
 6 files changed, 637 insertions(+), 242 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2245872ca9c3..3ac229fdc8a2 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,55 @@
+2011-07-28  Georg-Johann Lay  <avr@gjlay.de>
+	
+	PR target/49687
+	* config/avr/t-avr (LIB1ASMFUNCS): Remove _xmulhisi3_exit.
+	Add _muluhisi3, _mulshisi3, _usmulhisi3.
+	* config/avr/libgcc.S (__mulsi3): Rewrite.
+	(__mulhisi3): Rewrite.
+	(__umulhisi3): Rewrite.
+	(__usmulhisi3): New.
+	(__muluhisi3): New.
+	(__mulshisi3): New.
+	(__mulohisi3): New.
+	(__mulqi3, __mulqihi3, __umulqihi3, __mulhi3): Use DEFUN/ENDF to
+	declare.
+	* config/avr/predicates.md (pseudo_register_operand): Rewrite.
+	(pseudo_register_or_const_int_operand): New.
+	(combine_pseudo_register_operand): New.
+	(u16_operand): New.
+	(s16_operand): New.
+	(o16_operand): New.
+	* config/avr/avr.c (avr_rtx_costs): Handle costs for mult:SI.
+	* config/avr/avr.md (QIHI, QIHI2): New mode iterators.
+	(any_extend, any_extend2): New code iterators.
+	(extend_prefix): New code attribute.
+	(mulsi3): Rewrite. Turn insn to expander.
+	(mulhisi3): Ditto.
+	(umulhisi3): Ditto.
+	(usmulhisi3): New expander.
+	(*mulsi3): New insn-and-split.
+	(mulu<mode>si3): New insn-and-split.
+	(muls<mode>si3): New insn-and-split.
+	(mulohisi3): New insn-and-split.
+	(*uumulqihisi3, *uumulhiqisi3, *uumulhihisi3, *uumulqiqisi3,
+	*usmulqihisi3, *usmulhiqisi3, *usmulhihisi3, *usmulqiqisi3,
+	*sumulqihisi3, *sumulhiqisi3, *sumulhihisi3, *sumulqiqisi3,
+	*ssmulqihisi3, *ssmulhiqisi3, *ssmulhihisi3, *ssmulqiqisi3): New
+	insn-and-split.
+	(*mulsi3_call): Rewrite.
+	(*mulhisi3_call): Rewrite.
+	(*umulhisi3_call): Rewrite.
+	(*usmulhisi3_call): New insn.
+	(*muluhisi3_call): New insn.
+	(*mulshisi3_call): New insn.
+	(*mulohisi3_call): New insn.
+	(extendqihi2): Use combine_pseudo_register_operand as predicate
+	for operand 1.
+	(extendqisi2): Ditto.
+	(zero_extendqihi2): Ditto.
+	(zero_extendqisi2): Ditto.
+	(zero_extendhisi2): Ditto.
+	(extendhisi2): Ditto. Don't early-clobber operand 0.
+
 2011-07-28  Uros Bizjak  <ubizjak@gmail.com>
 
 	* config/i386/i386.c (add->lea splitter): Add SWI mode to PLUS RTX.
diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
index eb1707e922ba..d9ed22489479 100644
--- a/gcc/config/avr/avr.c
+++ b/gcc/config/avr/avr.c
@@ -5515,6 +5515,34 @@ avr_rtx_costs (rtx x, int codearg, int outer_code ATTRIBUTE_UNUSED, int *total,
 	    return false;
 	  break;
 
+	case SImode:
+	  if (AVR_HAVE_MUL)
+            {
+              if (!speed)
+                {
+                  /* Add some additional costs besides CALL like moves etc.  */
+
+                  *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 5 : 4);
+                }
+              else
+                {
+                  /* Just a rough estimate.  Even with -O2 we don't want bulky
+                     code expanded inline.  */
+
+                  *total = COSTS_N_INSNS (25);
+                }
+            }
+          else
+            {
+              if (speed)
+                *total = COSTS_N_INSNS (300);
+              else
+                /* Add some additional costs besides CALL like moves etc.  */
+                *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 5 : 4);
+            }
+          
+          return true;
+          
 	default:
 	  return false;
 	}
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 66c3db279c5c..3f3bb6a2b5b1 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -127,12 +127,25 @@
 		       (const_int 2))]
         (const_int 2)))
 
-;; Define mode iterator
+;; Define mode iterators
+(define_mode_iterator QIHI  [(QI "") (HI "")])
+(define_mode_iterator QIHI2 [(QI "") (HI "")])
 (define_mode_iterator QISI [(QI "") (HI "") (SI "")])
 (define_mode_iterator QIDI [(QI "") (HI "") (SI "") (DI "")])
 (define_mode_iterator HIDI [(HI "") (SI "") (DI "")])
 (define_mode_iterator HISI [(HI "") (SI "")])
 
+;; Define code iterators
+;; Define two incarnations so that we can build the cross product.
+(define_code_iterator any_extend  [sign_extend zero_extend])
+(define_code_iterator any_extend2 [sign_extend zero_extend])
+
+;; Define code attributes
+(define_code_attr extend_prefix
+  [(sign_extend "s")
+   (zero_extend "u")])
+
+
 ;;========================================================================
 ;; The following is used by nonlocal_goto and setjmp.
 ;; The receiver pattern will create no instructions since internally
@@ -1350,69 +1363,310 @@
 
 ;; Operand 2 (reg:SI 18) not clobbered on the enhanced core.
 ;; All call-used registers clobbered otherwise - normal library call.
+;;    To support widening multiplicatioon with constant we postpone
+;; expanding to the implicit library call until post combine and
+;; prior to register allocation.  Clobber all hard registers that
+;; might be used by the (widening) multiply until it is split and
+;; it's final register footprint is worked out.
+
 (define_expand "mulsi3"
-  [(set (reg:SI 22) (match_operand:SI 1 "register_operand" ""))
-   (set (reg:SI 18) (match_operand:SI 2 "register_operand" ""))
-   (parallel [(set (reg:SI 22) (mult:SI (reg:SI 22) (reg:SI 18)))
-	      (clobber (reg:HI 26))
-	      (clobber (reg:HI 30))])
-   (set (match_operand:SI 0 "register_operand" "") (reg:SI 22))]
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+                   (mult:SI (match_operand:SI 1 "register_operand" "")
+                            (match_operand:SI 2 "nonmemory_operand" "")))
+              (clobber (reg:DI 18))])]
   "AVR_HAVE_MUL"
-  "")
+  {
+    if (u16_operand (operands[2], SImode))
+      {
+        operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+        emit_insn (gen_muluhisi3 (operands[0], operands[2], operands[1]));
+        DONE;
+      }
 
-(define_insn "*mulsi3_call"
-  [(set (reg:SI 22) (mult:SI (reg:SI 22) (reg:SI 18)))
-   (clobber (reg:HI 26))
-   (clobber (reg:HI 30))]
-  "AVR_HAVE_MUL"
-  "%~call __mulsi3"
-  [(set_attr "type" "xcall")
-   (set_attr "cc" "clobber")])
+    if (o16_operand (operands[2], SImode))
+      {
+        operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+        emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1]));
+        DONE;
+      }
+  })
 
-(define_expand "mulhisi3"
-  [(set (reg:HI 18)
-        (match_operand:HI 1 "register_operand" ""))
-   (set (reg:HI 20)
-        (match_operand:HI 2 "register_operand" ""))
+(define_insn_and_split "*mulsi3"
+  [(set (match_operand:SI 0 "pseudo_register_operand"                      "=r")
+        (mult:SI (match_operand:SI 1 "pseudo_register_operand"              "r")
+                 (match_operand:SI 2 "pseudo_register_or_const_int_operand" "rn")))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:SI 18)
+        (match_dup 1))
    (set (reg:SI 22) 
-        (mult:SI (sign_extend:SI (reg:HI 18))
-                 (sign_extend:SI (reg:HI 20))))
-   (set (match_operand:SI 0 "register_operand" "") 
+        (match_dup 2))
+   (parallel [(set (reg:SI 22)
+                   (mult:SI (reg:SI 22)
+                            (reg:SI 18)))
+              (clobber (reg:HI 26))])
+   (set (match_dup 0)
+        (reg:SI 22))]
+  {
+    if (u16_operand (operands[2], SImode))
+      {
+        operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+        emit_insn (gen_muluhisi3 (operands[0], operands[2], operands[1]));
+        DONE;
+      }
+
+    if (o16_operand (operands[2], SImode))
+      {
+        operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+        emit_insn (gen_mulohisi3 (operands[0], operands[2], operands[1]));
+        DONE;
+      }
+  })
+
+;; "muluqisi3"
+;; "muluhisi3"
+(define_insn_and_split "mulu<mode>si3"
+  [(set (match_operand:SI 0 "pseudo_register_operand"                           "=r")
+        (mult:SI (zero_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
+                 (match_operand:SI 2 "pseudo_register_or_const_int_operand"      "rn")))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:HI 26)
+        (match_dup 1))
+   (set (reg:SI 18)
+        (match_dup 2))
+   (set (reg:SI 22)
+        (mult:SI (zero_extend:SI (reg:HI 26))
+                 (reg:SI 18)))
+   (set (match_dup 0)
+        (reg:SI 22))]
+  {
+    /* Do the QI -> HI extension explicitely before the multiplication.  */
+    /* Do the HI -> SI extension implicitely and after the multiplication.  */
+       
+    if (QImode == <MODE>mode)
+      operands[1] = gen_rtx_ZERO_EXTEND (HImode, operands[1]);
+
+    if (u16_operand (operands[2], SImode))
+      {
+        operands[1] = force_reg (HImode, operands[1]);
+        operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+        emit_insn (gen_umulhisi3 (operands[0], operands[1], operands[2]));
+        DONE;
+      }
+  })
+
+;; "mulsqisi3"
+;; "mulshisi3"
+(define_insn_and_split "muls<mode>si3"
+  [(set (match_operand:SI 0 "pseudo_register_operand"                           "=r")
+        (mult:SI (sign_extend:SI (match_operand:QIHI 1 "pseudo_register_operand" "r"))
+                 (match_operand:SI 2 "pseudo_register_or_const_int_operand"      "rn")))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:HI 26)
+        (match_dup 1))
+   (set (reg:SI 18)
+        (match_dup 2))
+   (set (reg:SI 22)
+        (mult:SI (sign_extend:SI (reg:HI 26))
+                 (reg:SI 18)))
+   (set (match_dup 0)
         (reg:SI 22))]
+  {
+    /* Do the QI -> HI extension explicitely before the multiplication.  */
+    /* Do the HI -> SI extension implicitely and after the multiplication.  */
+       
+    if (QImode == <MODE>mode)
+      operands[1] = gen_rtx_SIGN_EXTEND (HImode, operands[1]);
+
+    if (u16_operand (operands[2], SImode)
+        || s16_operand (operands[2], SImode))
+      {
+        rtx xop2 = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
+
+        operands[1] = force_reg (HImode, operands[1]);
+
+        if (u16_operand (operands[2], SImode))
+          emit_insn (gen_usmulhisi3 (operands[0], xop2, operands[1]));
+        else
+          emit_insn (gen_mulhisi3 (operands[0], operands[1], xop2));
+
+        DONE;
+      }
+  })
+
+;; One-extend operand 1
+
+(define_insn_and_split "mulohisi3"
+  [(set (match_operand:SI 0 "pseudo_register_operand"                          "=r")
+        (mult:SI (not:SI (zero_extend:SI 
+                          (not:HI (match_operand:HI 1 "pseudo_register_operand" "r"))))
+                 (match_operand:SI 2 "pseudo_register_or_const_int_operand"     "rn")))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:HI 26)
+        (match_dup 1))
+   (set (reg:SI 18)
+        (match_dup 2))
+   (set (reg:SI 22)
+        (mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26))))
+                 (reg:SI 18)))
+   (set (match_dup 0)
+        (reg:SI 22))]
+  "")
+
+(define_expand "mulhisi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+                   (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand" ""))
+                            (sign_extend:SI (match_operand:HI 2 "register_operand" ""))))
+              (clobber (reg:DI 18))])]
   "AVR_HAVE_MUL"
   "")
 
 (define_expand "umulhisi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+                   (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" ""))
+                            (zero_extend:SI (match_operand:HI 2 "register_operand" ""))))
+              (clobber (reg:DI 18))])]
+  "AVR_HAVE_MUL"
+  "")
+
+(define_expand "usmulhisi3"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "")
+                   (mult:SI (zero_extend:SI (match_operand:HI 1 "register_operand" ""))
+                            (sign_extend:SI (match_operand:HI 2 "register_operand" ""))))
+              (clobber (reg:DI 18))])]
+  "AVR_HAVE_MUL"
+  "")
+
+;; "*uumulqihisi3" "*uumulhiqisi3" "*uumulhihisi3" "*uumulqiqisi3"
+;; "*usmulqihisi3" "*usmulhiqisi3" "*usmulhihisi3" "*usmulqiqisi3"
+;; "*sumulqihisi3" "*sumulhiqisi3" "*sumulhihisi3" "*sumulqiqisi3"
+;; "*ssmulqihisi3" "*ssmulhiqisi3" "*ssmulhihisi3" "*ssmulqiqisi3"
+(define_insn_and_split
+  "*<any_extend:extend_prefix><any_extend2:extend_prefix>mul<QIHI:mode><QIHI2:mode>si3"
+  [(set (match_operand:SI 0 "pseudo_register_operand"                            "=r")
+        (mult:SI (any_extend:SI (match_operand:QIHI 1 "pseudo_register_operand"   "r"))
+                 (any_extend2:SI (match_operand:QIHI2 2 "pseudo_register_operand" "r"))))
+   (clobber (reg:DI 18))]
+  "AVR_HAVE_MUL && !reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
   [(set (reg:HI 18)
-        (match_operand:HI 1 "register_operand" ""))
-   (set (reg:HI 20)
-        (match_operand:HI 2 "register_operand" ""))
-   (set (reg:SI 22) 
-        (mult:SI (zero_extend:SI (reg:HI 18))
-                 (zero_extend:SI (reg:HI 20))))
-   (set (match_operand:SI 0 "register_operand" "") 
+        (match_dup 1))
+   (set (reg:HI 26)
+        (match_dup 2))
+   (set (reg:SI 22)
+        (mult:SI (match_dup 3)
+                 (match_dup 4)))
+   (set (match_dup 0)
         (reg:SI 22))]
+  {
+    rtx xop1 = operands[1];
+    rtx xop2 = operands[2];
+
+    /* Do the QI -> HI extension explicitely before the multiplication.  */
+    /* Do the HI -> SI extension implicitely and after the multiplication.  */
+       
+    if (QImode == <QIHI:MODE>mode)
+      xop1 = gen_rtx_fmt_e (<any_extend:CODE>, HImode, xop1);
+
+    if (QImode == <QIHI2:MODE>mode)
+      xop2 = gen_rtx_fmt_e (<any_extend2:CODE>, HImode, xop2);
+
+    if (<any_extend:CODE> == <any_extend2:CODE>
+        || <any_extend:CODE> == ZERO_EXTEND)
+      {
+        operands[1] = xop1;
+        operands[2] = xop2;
+        operands[3] = gen_rtx_fmt_e (<any_extend:CODE>, SImode, gen_rtx_REG (HImode, 18));
+        operands[4] = gen_rtx_fmt_e (<any_extend2:CODE>, SImode, gen_rtx_REG (HImode, 26));
+      }
+    else
+      {
+        /* <any_extend:CODE>  = SIGN_EXTEND */
+        /* <any_extend2:CODE> = ZERO_EXTEND */
+
+        operands[1] = xop2;
+        operands[2] = xop1;
+        operands[3] = gen_rtx_ZERO_EXTEND (SImode, gen_rtx_REG (HImode, 18));
+        operands[4] = gen_rtx_SIGN_EXTEND (SImode, gen_rtx_REG (HImode, 26));
+      }
+  })
+
+(define_insn "*mulsi3_call"
+  [(set (reg:SI 22)
+        (mult:SI (reg:SI 22)
+                 (reg:SI 18)))
+   (clobber (reg:HI 26))]
   "AVR_HAVE_MUL"
-  "")
+  "%~call __mulsi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
 
 (define_insn "*mulhisi3_call"
-  [(set (reg:SI 22) 
+  [(set (reg:SI 22)
         (mult:SI (sign_extend:SI (reg:HI 18))
-                 (sign_extend:SI (reg:HI 20))))]
+                 (sign_extend:SI (reg:HI 26))))]
   "AVR_HAVE_MUL"
   "%~call __mulhisi3"
   [(set_attr "type" "xcall")
    (set_attr "cc" "clobber")])
 
 (define_insn "*umulhisi3_call"
-  [(set (reg:SI 22) 
+  [(set (reg:SI 22)
         (mult:SI (zero_extend:SI (reg:HI 18))
-                 (zero_extend:SI (reg:HI 20))))]
+                 (zero_extend:SI (reg:HI 26))))]
   "AVR_HAVE_MUL"
   "%~call __umulhisi3"
   [(set_attr "type" "xcall")
    (set_attr "cc" "clobber")])
 
+(define_insn "*usmulhisi3_call"
+  [(set (reg:SI 22)
+        (mult:SI (zero_extend:SI (reg:HI 18))
+                 (sign_extend:SI (reg:HI 26))))]
+  "AVR_HAVE_MUL"
+  "%~call __usmulhisi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*muluhisi3_call"
+  [(set (reg:SI 22)
+        (mult:SI (zero_extend:SI (reg:HI 26))
+                 (reg:SI 18)))]
+  "AVR_HAVE_MUL"
+  "%~call __muluhisi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*mulshisi3_call"
+  [(set (reg:SI 22)
+        (mult:SI (sign_extend:SI (reg:HI 26))
+                 (reg:SI 18)))]
+  "AVR_HAVE_MUL"
+  "%~call __mulshisi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*mulohisi3_call"
+  [(set (reg:SI 22)
+        (mult:SI (not:SI (zero_extend:SI (not:HI (reg:HI 26))))
+                 (reg:SI 18)))]
+  "AVR_HAVE_MUL"
+  "%~call __mulohisi3"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
 ; / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / % / %
 ; divmod
 
@@ -2400,9 +2654,16 @@
 ;; xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x xx<---x
 ;; sign extend
 
+;; We keep combiner from inserting hard registers into the input of sign- and
+;; zero-extends.  A hard register in the input operand is not wanted because
+;; 32-bit multiply patterns clobber some hard registers and extends with a
+;; hard register that overlaps these clobbers won't be combined to a widening
+;; multiplication.  There is no need for combine to propagate hard registers,
+;; register allocation can do it just as well.
+
 (define_insn "extendqihi2"
   [(set (match_operand:HI 0 "register_operand" "=r,r")
-        (sign_extend:HI (match_operand:QI 1 "register_operand" "0,*r")))]
+        (sign_extend:HI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))]
   ""
   "@
 	clr %B0\;sbrc %0,7\;com %B0
@@ -2412,7 +2673,7 @@
 
 (define_insn "extendqisi2"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
-        (sign_extend:SI (match_operand:QI 1 "register_operand" "0,*r")))]
+        (sign_extend:SI (match_operand:QI 1 "combine_pseudo_register_operand" "0,*r")))]
   ""
   "@
 	clr %B0\;sbrc %A0,7\;com %B0\;mov %C0,%B0\;mov %D0,%B0
@@ -2421,8 +2682,8 @@
    (set_attr "cc" "set_n,set_n")])
 
 (define_insn "extendhisi2"
-  [(set (match_operand:SI 0 "register_operand"               "=r,&r")
-        (sign_extend:SI (match_operand:HI 1 "register_operand" "0,*r")))]
+  [(set (match_operand:SI 0 "register_operand"                               "=r,r")
+        (sign_extend:SI (match_operand:HI 1 "combine_pseudo_register_operand" "0,*r")))]
   ""
   "@
 	clr %C0\;sbrc %B0,7\;com %C0\;mov %D0,%C0
@@ -2439,7 +2700,7 @@
 
 (define_insn_and_split "zero_extendqihi2"
   [(set (match_operand:HI 0 "register_operand" "=r")
-        (zero_extend:HI (match_operand:QI 1 "register_operand" "r")))]
+        (zero_extend:HI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))]
   ""
   "#"
   "reload_completed"
@@ -2455,7 +2716,7 @@
 
 (define_insn_and_split "zero_extendqisi2"
   [(set (match_operand:SI 0 "register_operand" "=r")
-        (zero_extend:SI (match_operand:QI 1 "register_operand" "r")))]
+        (zero_extend:SI (match_operand:QI 1 "combine_pseudo_register_operand" "r")))]
   ""
   "#"
   "reload_completed"
@@ -2470,8 +2731,8 @@
 })
 
 (define_insn_and_split "zero_extendhisi2"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (zero_extend:SI (match_operand:HI 1 "register_operand" "r")))]
+  [(set (match_operand:SI 0 "register_operand"                               "=r")
+        (zero_extend:SI (match_operand:HI 1 "combine_pseudo_register_operand" "r")))]
   ""
   "#"
   "reload_completed"
diff --git a/gcc/config/avr/libgcc.S b/gcc/config/avr/libgcc.S
index 7f3feeb23a40..f6084a705182 100644
--- a/gcc/config/avr/libgcc.S
+++ b/gcc/config/avr/libgcc.S
@@ -72,10 +72,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 .endm
 
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 /* Note: mulqi3, mulhi3 are open-coded on the enhanced core.  */
 #if !defined (__AVR_HAVE_MUL__)
 /*******************************************************
-               Multiplication  8 x 8
+    Multiplication  8 x 8  without MUL
 *******************************************************/
 #if defined (L_mulqi3)
 
@@ -83,9 +84,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define	r_arg1 	r24		/* multiplier */
 #define r_res	__tmp_reg__	/* result */
 
-	.global	__mulqi3
-	.func	__mulqi3
-__mulqi3:
+DEFUN __mulqi3
 	clr	r_res		; clear result
 __mulqi3_loop:
 	sbrc	r_arg1,0
@@ -97,18 +96,16 @@ __mulqi3_loop:
 __mulqi3_exit:	
 	mov	r_arg1,r_res	; result to return register
 	ret
+ENDF __mulqi3
 
 #undef r_arg2  
 #undef r_arg1  
 #undef r_res   
 	
-.endfunc
 #endif 	/* defined (L_mulqi3) */
 
 #if defined (L_mulqihi3)
-	.global	__mulqihi3
-	.func	__mulqihi3
-__mulqihi3:
+DEFUN __mulqihi3
 	clr	r25
 	sbrc	r24, 7
 	dec	r25
@@ -116,21 +113,19 @@ __mulqihi3:
 	sbrc	r22, 7
 	dec	r22
 	rjmp	__mulhi3
-	.endfunc
+ENDF __mulqihi3:
 #endif /* defined (L_mulqihi3) */
 
 #if defined (L_umulqihi3)
-	.global	__umulqihi3
-	.func	__umulqihi3
-__umulqihi3:
+DEFUN __umulqihi3
 	clr	r25
 	clr	r23
 	rjmp	__mulhi3
-	.endfunc
+ENDF __umulqihi3
 #endif /* defined (L_umulqihi3) */
 
 /*******************************************************
-               Multiplication  16 x 16
+    Multiplication  16 x 16  without MUL
 *******************************************************/
 #if defined (L_mulhi3)
 #define	r_arg1L	r24		/* multiplier Low */
@@ -140,9 +135,7 @@ __umulqihi3:
 #define r_resL	__tmp_reg__	/* result Low */
 #define r_resH  r21		/* result High */
 
-	.global	__mulhi3
-	.func	__mulhi3
-__mulhi3:
+DEFUN __mulhi3
 	clr	r_resH		; clear result
 	clr	r_resL		; clear result
 __mulhi3_loop:
@@ -166,6 +159,7 @@ __mulhi3_exit:
 	mov	r_arg1H,r_resH	; result to return register
 	mov	r_arg1L,r_resL
 	ret
+ENDF __mulhi3
 
 #undef r_arg1L
 #undef r_arg1H
@@ -174,168 +168,51 @@ __mulhi3_exit:
 #undef r_resL 	
 #undef r_resH 
 
-.endfunc
 #endif /* defined (L_mulhi3) */
-#endif /* !defined (__AVR_HAVE_MUL__) */
 
 /*******************************************************
-      Widening Multiplication  32 = 16 x 16
+    Widening Multiplication  32 = 16 x 16  without MUL
 *******************************************************/
-                              
+
 #if defined (L_mulhisi3)
 DEFUN __mulhisi3
-#if defined (__AVR_HAVE_MUL__)
-
-;; r25:r22 = r19:r18 * r21:r20
-
-#define A0 18
-#define B0 20
-#define C0 22
-
-#define A1 A0+1
-#define B1 B0+1
-#define C1 C0+1
-#define C2 C0+2
-#define C3 C0+3
- 
-    ; C = (signed)A1 * (signed)B1
-    muls  A1, B1
-    movw  C2, R0
-
-    ; C += A0 * B0
-    mul   A0, B0
-    movw  C0, R0
-
-    ; C += (signed)A1 * B0
-    mulsu A1, B0
-    sbci  C3, 0
-    add   C1, R0
-    adc   C2, R1
-    clr   __zero_reg__
-    adc   C3, __zero_reg__
-
-    ; C += (signed)B1 * A0
-    mulsu B1, A0
-    sbci  C3, 0
-    XJMP  __xmulhisi3_exit
-
-#undef A0
-#undef A1
-#undef B0
-#undef B1
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#else /* !__AVR_HAVE_MUL__ */
 ;;; FIXME: This is dead code (noone calls it)
-	mov_l	r18, r24
-	mov_h	r19, r25
-	clr	r24
-	sbrc	r23, 7
-	dec	r24
-	mov	r25, r24
-	clr	r20
-	sbrc	r19, 7
-	dec	r20
-	mov	r21, r20
-	XJMP	__mulsi3
-#endif /* __AVR_HAVE_MUL__ */
+    mov_l   r18, r24
+    mov_h   r19, r25
+    clr     r24
+    sbrc    r23, 7
+    dec     r24
+    mov     r25, r24
+    clr     r20
+    sbrc    r19, 7
+    dec     r20
+    mov     r21, r20
+    XJMP    __mulsi3
 ENDF __mulhisi3
 #endif /* defined (L_mulhisi3) */
 
 #if defined (L_umulhisi3)
 DEFUN __umulhisi3
-#if defined (__AVR_HAVE_MUL__)
-
-;; r25:r22 = r19:r18 * r21:r20
-
-#define A0 18
-#define B0 20
-#define C0 22
-
-#define A1 A0+1
-#define B1 B0+1
-#define C1 C0+1
-#define C2 C0+2
-#define C3 C0+3
-
-    ; C = A1 * B1
-    mul   A1, B1
-    movw  C2, R0
-
-    ; C += A0 * B0
-    mul   A0, B0
-    movw  C0, R0
-
-    ; C += A1 * B0
-    mul   A1, B0
-    add   C1, R0
-    adc   C2, R1
-    clr   __zero_reg__
-    adc   C3, __zero_reg__
-
-    ; C += B1 * A0
-    mul   B1, A0
-    XJMP  __xmulhisi3_exit
-
-#undef A0
-#undef A1
-#undef B0
-#undef B1
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#else /* !__AVR_HAVE_MUL__ */
 ;;; FIXME: This is dead code (noone calls it)
-	mov_l	r18, r24
-	mov_h	r19, r25
-	clr	r24
-	clr	r25
-	clr	r20
-	clr	r21
-	XJMP	__mulsi3
-#endif /* __AVR_HAVE_MUL__ */
+    mov_l   r18, r24
+    mov_h   r19, r25
+    clr     r24
+    clr     r25
+    mov_l   r20, r24
+    mov_h   r21, r25
+    XJMP    __mulsi3
 ENDF __umulhisi3
 #endif /* defined (L_umulhisi3) */
 
-#if defined (L_xmulhisi3_exit)
-
-;;; Helper for __mulhisi3 resp. __umulhisi3.
-
-#define C0 22
-#define C1 C0+1
-#define C2 C0+2
-#define C3 C0+3
-
-DEFUN __xmulhisi3_exit
-    add   C1, R0
-    adc   C2, R1
-    clr   __zero_reg__
-    adc   C3, __zero_reg__
-    ret
-ENDF __xmulhisi3_exit
-
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-
-#endif /* defined (L_xmulhisi3_exit) */
-
 #if defined (L_mulsi3)
 /*******************************************************
-               Multiplication  32 x 32
+    Multiplication  32 x 32  without MUL
 *******************************************************/
 #define r_arg1L  r22		/* multiplier Low */
 #define r_arg1H  r23
 #define	r_arg1HL r24
 #define	r_arg1HH r25		/* multiplier High */
 
-
 #define	r_arg2L  r18		/* multiplicand Low */
 #define	r_arg2H  r19	
 #define	r_arg2HL r20
@@ -346,43 +223,7 @@ ENDF __xmulhisi3_exit
 #define r_resHL	 r30
 #define r_resHH  r31		/* result High */
 
-	
-	.global	__mulsi3
-	.func	__mulsi3
-__mulsi3:
-#if defined (__AVR_HAVE_MUL__)
-	mul	r_arg1L, r_arg2L
-	movw	r_resL, r0
-	mul	r_arg1H, r_arg2H
-	movw	r_resHL, r0
-	mul	r_arg1HL, r_arg2L
-	add	r_resHL, r0
-	adc	r_resHH, r1
-	mul	r_arg1L, r_arg2HL
-	add	r_resHL, r0
-	adc	r_resHH, r1
-	mul	r_arg1HH, r_arg2L
-	add	r_resHH, r0
-	mul	r_arg1HL, r_arg2H
-	add	r_resHH, r0
-	mul	r_arg1H, r_arg2HL
-	add	r_resHH, r0
-	mul	r_arg1L, r_arg2HH
-	add	r_resHH, r0
-	clr	r_arg1HH	; use instead of __zero_reg__ to add carry
-	mul	r_arg1H, r_arg2L
-	add	r_resH, r0
-	adc	r_resHL, r1
-	adc	r_resHH, r_arg1HH ; add carry
-	mul	r_arg1L, r_arg2H
-	add	r_resH, r0
-	adc	r_resHL, r1
-	adc	r_resHH, r_arg1HH ; add carry
-	movw	r_arg1L, r_resL
-	movw	r_arg1HL, r_resHL
-	clr	r1		; __zero_reg__ clobbered by "mul"
-	ret
-#else
+DEFUN __mulsi3
 	clr	r_resHH		; clear result
 	clr	r_resHL		; clear result
 	clr	r_resH		; clear result
@@ -414,13 +255,13 @@ __mulsi3_exit:
 	mov_h	r_arg1H,r_resH
 	mov_l	r_arg1L,r_resL
 	ret
-#endif /* defined (__AVR_HAVE_MUL__) */
+ENDF __mulsi3
+
 #undef r_arg1L 
 #undef r_arg1H 
 #undef r_arg1HL
 #undef r_arg1HH
              
-             
 #undef r_arg2L 
 #undef r_arg2H 
 #undef r_arg2HL
@@ -431,9 +272,181 @@ __mulsi3_exit:
 #undef r_resHL 
 #undef r_resHH 
 
-.endfunc
 #endif /* defined (L_mulsi3) */
+
+#endif /* !defined (__AVR_HAVE_MUL__) */
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+#if defined (__AVR_HAVE_MUL__)    
+#define A0 26
+#define B0 18
+#define C0 22
+
+#define A1 A0+1
+
+#define B1 B0+1
+#define B2 B0+2
+#define B3 B0+3
+
+#define C1 C0+1
+#define C2 C0+2
+#define C3 C0+3
+
+/*******************************************************
+    Widening Multiplication  32 = 16 x 16
+*******************************************************/
+                              
+#if defined (L_mulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (signed long) R19:R18
+;;; C3:C0   = (signed long) A1:A0   * (signed long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulhisi3
+    XCALL   __umulhisi3
+    ;; Sign-extend B
+    tst     B1
+    brpl    1f
+    sub     C2, A0
+    sbc     C3, A1
+1:  ;; Sign-extend A
+    XJMP __usmulhisi3_tail
+ENDF __mulhisi3
+#endif /* L_mulhisi3 */
+
+#if defined (L_usmulhisi3)
+;;; R25:R22 = (signed long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0   = (signed long) A1:A0   * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __usmulhisi3
+    XCALL   __umulhisi3
+    ;; FALLTHRU
+ENDF __usmulhisi3
+
+DEFUN __usmulhisi3_tail
+    ;; Sign-extend A
+    sbrs    A1, 7
+    ret
+    sub     C2, B0
+    sbc     C3, B1
+    ret
+ENDF __usmulhisi3_tail
+#endif /* L_usmulhisi3 */
+
+#if defined (L_umulhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * (unsigned long) R19:R18
+;;; C3:C0   = (unsigned long) A1:A0   * (unsigned long) B1:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __umulhisi3
+    mul     A0, B0
+    movw    C0, r0
+    mul     A1, B1
+    movw    C2, r0
+    mul     A0, B1
+    rcall   1f
+    mul     A1, B0
+1:  add     C1, r0
+    adc     C2, r1
+    clr     __zero_reg__
+    adc     C3, __zero_reg__
+    ret
+ENDF __umulhisi3
+#endif /* L_umulhisi3 */
+
+/*******************************************************
+    Widening Multiplication  32 = 16 x 32
+*******************************************************/
+
+#if defined (L_mulshisi3)
+;;; R25:R22 = (signed long) R27:R26 * R21:R18
+;;; (C3:C0) = (signed long) A1:A0   * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulshisi3
+#ifdef __AVR_HAVE_JMP_CALL__
+    ;; Some cores have problem skipping 2-word instruction
+    tst     A1
+    brmi    __mulohisi3
+#else
+    sbrs    A1, 7
+#endif /* __AVR_HAVE_JMP_CALL__ */
+    XJMP    __muluhisi3
+    ;; FALLTHRU
+ENDF __mulshisi3
+    
+;;; R25:R22 = (one-extended long) R27:R26 * R21:R18
+;;; (C3:C0) = (one-extended long) A1:A0   * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __mulohisi3
+    XCALL   __muluhisi3
+    ;; One-extend R27:R26 (A1:A0)
+    sub     C2, B0
+    sbc     C3, B1
+    ret
+ENDF __mulohisi3
+#endif /* L_mulshisi3 */
+
+#if defined (L_muluhisi3)
+;;; R25:R22 = (unsigned long) R27:R26 * R21:R18
+;;; (C3:C0) = (unsigned long) A1:A0   * B3:B0
+;;; Clobbers: __tmp_reg__
+DEFUN __muluhisi3
+    XCALL   __umulhisi3
+    mul     A0, B3
+    add     C3, r0
+    mul     A1, B2
+    add     C3, r0
+    mul     A0, B2
+    add     C2, r0
+    adc     C3, r1
+    clr     __zero_reg__
+    ret
+ENDF __muluhisi3
+#endif /* L_muluhisi3 */
+
+/*******************************************************
+    Multiplication  32 x 32
+*******************************************************/
+
+#if defined (L_mulsi3)
+;;; R25:R22 = R25:R22 * R21:R18
+;;; (C3:C0) = C3:C0   * B3:B0
+;;; Clobbers: R26, R27, __tmp_reg__
+DEFUN __mulsi3
+    movw    A0, C0
+    push    C2
+    push    C3
+    XCALL   __muluhisi3
+    pop     A1
+    pop     A0
+    ;; A1:A0 now contains the high word of A
+    mul     A0, B0
+    add     C2, r0
+    adc     C3, r1
+    mul     A0, B1
+    add     C3, r0
+    mul     A1, B0
+    add     C3, r0
+    clr     __zero_reg__
+    ret
+ENDF __mulsi3
+#endif /* L_mulsi3 */
+
+#undef A0
+#undef A1
+
+#undef B0
+#undef B1
+#undef B2
+#undef B3
+
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+
+#endif /* __AVR_HAVE_MUL__ */
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 	
+
 /*******************************************************
        Division 8 / 8 => (result + remainder)
 *******************************************************/
diff --git a/gcc/config/avr/predicates.md b/gcc/config/avr/predicates.md
index 6646cb546100..98262d54f55e 100755
--- a/gcc/config/avr/predicates.md
+++ b/gcc/config/avr/predicates.md
@@ -155,10 +155,34 @@
        (ior (match_test "register_operand (XEXP (op, 0), mode)")
             (match_test "CONSTANT_ADDRESS_P (XEXP (op, 0))"))))
 
+;; For some insns we must ensure that no hard register is inserted
+;; into their operands because the insns are split and the split
+;; involves hard registers.  An example are divmod insn that are
+;; split to insns that represent implicit library calls.
+
 ;; True for register that is pseudo register.
 (define_predicate "pseudo_register_operand"
-  (and (match_code "reg")
-       (match_test "!HARD_REGISTER_P (op)")))
+  (and (match_operand 0 "register_operand")
+       (not (and (match_code "reg")
+                 (match_test "HARD_REGISTER_P (op)")))))
+
+;; True for operand that is pseudo register or CONST_INT.
+(define_predicate "pseudo_register_or_const_int_operand"
+  (ior (match_operand 0 "const_int_operand")
+       (match_operand 0 "pseudo_register_operand")))
+
+;; We keep combiner from inserting hard registers into the input of sign- and
+;; zero-extends.  A hard register in the input operand is not wanted because
+;; 32-bit multiply patterns clobber some hard registers and extends with a
+;; hard register that overlaps these clobbers won't combine to a widening
+;; multiplication.  There is no need for combine to propagate or insert
+;; hard registers, register allocation can do it just as well.
+
+;; True for operand that is pseudo register at combine time.
+(define_predicate "combine_pseudo_register_operand"
+  (ior (match_operand 0 "pseudo_register_operand")
+       (and (match_operand 0 "register_operand")
+            (match_test "reload_completed || reload_in_progress"))))
 
 ;; Return true if OP is a constant integer that is either
 ;; 8 or 16 or 24.
@@ -189,3 +213,18 @@
 (define_predicate "register_or_s9_operand"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "s9_operand")))
+
+;; Unsigned CONST_INT that fits in 16 bits, i.e. 0..65536.
+(define_predicate "u16_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 0, (1<<16)-1)")))
+
+;; Signed CONST_INT that fits in 16 bits, i.e. -32768..32767.
+(define_predicate "s16_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), -(1<<15), (1<<15)-1)")))
+
+;; One-extended CONST_INT that fits in 16 bits, i.e. -65536..-1.
+(define_predicate "o16_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), -(1<<16), -1)")))
diff --git a/gcc/config/avr/t-avr b/gcc/config/avr/t-avr
index a5357f0ecf6e..d79dd5a47b64 100644
--- a/gcc/config/avr/t-avr
+++ b/gcc/config/avr/t-avr
@@ -41,7 +41,9 @@ LIB1ASMFUNCS = \
 	_mulhi3 \
 	_mulhisi3 \
 	_umulhisi3 \
-	_xmulhisi3_exit \
+	_usmulhisi3 \
+	_muluhisi3 \
+	_mulshisi3 \
 	_mulsi3 \
 	_udivmodqi4 \
 	_divmodqi4 \
-- 
GitLab