From 04c9ea12e0f579e0f974ffe6a979376ddafbc0a1 Mon Sep 17 00:00:00 2001
From: Georg-Johann Lay <avr@gjlay.de>
Date: Wed, 27 Jul 2011 16:39:13 +0000
Subject: [PATCH] re PR target/49313 (Inefficient libgcc implementations for
 avr)

	PR target/49313
	* config/avr/libgcc.S (__ffshi2): Don't skip 2-word instruction.
	(__ctzsi2): Result for 0 may be undefined.
	(__ctzhi2): Result for 0 may be undefined.
	(__popcounthi2): Don't clobber r30. Use __popcounthi2_tail.
	(__popcountsi2): Ditto. And don't clobber r26.
	(__popcountdi2): Ditto. And don't clobber r27.
	* config/avr/avr.md (UNSPEC_COPYSIGN): New c_enum.
	(parityhi2): New expand.
	(paritysi2): New expand.
	(popcounthi2): New expand.
	(popcountsi2): New expand.
	(clzhi2): New expand.
	(clzsi2): New expand.
	(ctzhi2): New expand.
	(ctzsi2): New expand.
	(ffshi2): New expand.
	(ffssi2): New expand.
	(copysignsf3): New insn.
	(bswapsi2): New expand.
	(*parityhi2.libgcc): New insn.
	(*parityqihi2.libgcc): New insn.
	(*paritysihi2.libgcc): New insn.
	(*popcounthi2.libgcc): New insn.
	(*popcountsi2.libgcc): New insn.
	(*popcountqi2.libgcc): New insn.
	(*popcountqihi2.libgcc): New insn-and-split.
	(*clzhi2.libgcc): New insn.
	(*clzsihi2.libgcc): New insn.
	(*ctzhi2.libgcc): New insn.
	(*ctzsihi2.libgcc): New insn.
	(*ffshi2.libgcc): New insn.
	(*ffssihi2.libgcc): New insn.
	(*bswapsi2.libgcc): New insn.

From-SVN: r176835
---
 gcc/ChangeLog           |  37 ++++++
 gcc/config/avr/avr.md   | 270 ++++++++++++++++++++++++++++++++++++++++
 gcc/config/avr/libgcc.S |  47 ++++---
 3 files changed, 335 insertions(+), 19 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 43e0364b4e70..2ace76c120c3 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,40 @@
+2011-07-27  Georg-Johann Lay  <avr@gjlay.de>
+	
+	PR target/49313
+	* config/avr/libgcc.S (__ffshi2): Don't skip 2-word instruction.
+	(__ctzsi2): Result for 0 may be undefined.
+	(__ctzhi2): Result for 0 may be undefined.
+	(__popcounthi2): Don't clobber r30. Use __popcounthi2_tail.
+	(__popcountsi2): Ditto. And don't clobber r26.
+	(__popcountdi2): Ditto. And don't clobber r27.
+	* config/avr/avr.md (UNSPEC_COPYSIGN): New c_enum.
+	(parityhi2): New expand.
+	(paritysi2): New expand.
+	(popcounthi2): New expand.
+	(popcountsi2): New expand.
+	(clzhi2): New expand.
+	(clzsi2): New expand.
+	(ctzhi2): New expand.
+	(ctzsi2): New expand.
+	(ffshi2): New expand.
+	(ffssi2): New expand.
+	(copysignsf3): New insn.
+	(bswapsi2): New expand.
+	(*parityhi2.libgcc): New insn.
+	(*parityqihi2.libgcc): New insn.
+	(*paritysihi2.libgcc): New insn.
+	(*popcounthi2.libgcc): New insn.
+	(*popcountsi2.libgcc): New insn.
+	(*popcountqi2.libgcc): New insn.
+	(*popcountqihi2.libgcc): New insn-and-split.
+	(*clzhi2.libgcc): New insn.
+	(*clzsihi2.libgcc): New insn.
+	(*ctzhi2.libgcc): New insn.
+	(*ctzsihi2.libgcc): New insn.
+	(*ffshi2.libgcc): New insn.
+	(*ffssihi2.libgcc): New insn.
+	(*bswapsi2.libgcc): New insn.
+
 2011-07-27  Uros Bizjak  <ubizjak@gmail.com>
 
 	* config/i386/i386.c (ix86_expand_move): Do not explicitly check
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 250133e5ebbb..66c3db279c5c 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -55,6 +55,7 @@
    UNSPEC_FMUL
    UNSPEC_FMULS
    UNSPEC_FMULSU
+   UNSPEC_COPYSIGN
    ])
 
 (define_c_enum "unspecv"
@@ -3680,6 +3681,275 @@
   [(set_attr "length" "9")
    (set_attr "cc" "clobber")])
 
+
+;; Parity
+
+(define_expand "parityhi2"
+  [(set (reg:HI 24)
+        (match_operand:HI 1 "register_operand" ""))
+   (set (reg:HI 24)
+        (parity:HI (reg:HI 24)))
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))]
+  ""
+  "")
+
+(define_expand "paritysi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (set (reg:HI 24)
+        (parity:HI (reg:SI 22)))
+   (set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (reg:HI 24)))]
+  ""
+  "")
+
+(define_insn "*parityhi2.libgcc"
+  [(set (reg:HI 24)
+        (parity:HI (reg:HI 24)))]
+  ""
+  "%~call __parityhi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*parityqihi2.libgcc"
+  [(set (reg:HI 24)
+        (parity:HI (reg:QI 24)))]
+  ""
+  "%~call __parityqi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*paritysihi2.libgcc"
+  [(set (reg:HI 24)
+        (parity:HI (reg:SI 22)))]
+  ""
+  "%~call __paritysi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+
+;; Popcount
+
+(define_expand "popcounthi2"
+  [(set (reg:HI 24)
+        (match_operand:HI 1 "register_operand" ""))
+   (set (reg:HI 24)
+        (popcount:HI (reg:HI 24)))
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))]
+  ""
+  "")
+
+(define_expand "popcountsi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (set (reg:HI 24)
+        (popcount:HI (reg:SI 22)))
+   (set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (reg:HI 24)))]
+  ""
+  "")
+
+(define_insn "*popcounthi2.libgcc"
+  [(set (reg:HI 24)
+        (popcount:HI (reg:HI 24)))]
+  ""
+  "%~call __popcounthi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*popcountsi2.libgcc"
+  [(set (reg:HI 24)
+        (popcount:HI (reg:SI 22)))]
+  ""
+  "%~call __popcountsi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*popcountqi2.libgcc"
+  [(set (reg:QI 24)
+        (popcount:QI (reg:QI 24)))]
+  ""
+  "%~call __popcountqi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "*popcountqihi2.libgcc"
+  [(set (reg:HI 24)
+        (popcount:HI (reg:QI 24)))]
+  ""
+  "#"
+  ""
+  [(set (reg:QI 24)
+        (popcount:QI (reg:QI 24)))
+   (set (reg:QI 25)
+        (const_int 0))]
+  "")
+
+;; Count Leading Zeros
+
+(define_expand "clzhi2"
+  [(set (reg:HI 24)
+        (match_operand:HI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (clz:HI (reg:HI 24)))
+              (clobber (reg:QI 26))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))]
+  ""
+  "")
+
+(define_expand "clzsi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (clz:HI (reg:SI 22)))
+              (clobber (reg:QI 26))])
+   (set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (reg:HI 24)))]
+  ""
+  "")
+
+(define_insn "*clzhi2.libgcc"
+  [(parallel [(set (reg:HI 24)
+                   (clz:HI (reg:HI 24)))
+              (clobber (reg:QI 26))])]
+  ""
+  "%~call __clzhi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*clzsihi2.libgcc"
+  [(parallel [(set (reg:HI 24)
+                   (clz:HI (reg:SI 22)))
+              (clobber (reg:QI 26))])]
+  ""
+  "%~call __clzsi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; Count Trailing Zeros
+
+(define_expand "ctzhi2"
+  [(set (reg:HI 24)
+        (match_operand:HI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (ctz:HI (reg:HI 24)))
+              (clobber (reg:QI 26))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))]
+  ""
+  "")
+
+(define_expand "ctzsi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (ctz:HI (reg:SI 22)))
+              (clobber (reg:QI 22))
+              (clobber (reg:QI 26))])
+   (set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (reg:HI 24)))]
+  ""
+  "")
+
+(define_insn "*ctzhi2.libgcc"
+  [(set (reg:HI 24)
+        (ctz:HI (reg:HI 24)))
+   (clobber (reg:QI 26))]
+  ""
+  "%~call __ctzhi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*ctzsihi2.libgcc"
+  [(set (reg:HI 24)
+        (ctz:HI (reg:SI 22)))
+   (clobber (reg:QI 22))
+   (clobber (reg:QI 26))]
+  ""
+  "%~call __ctzsi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; Find First Set
+
+(define_expand "ffshi2"
+  [(set (reg:HI 24)
+        (match_operand:HI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (ffs:HI (reg:HI 24)))
+              (clobber (reg:QI 26))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))]
+  ""
+  "")
+
+(define_expand "ffssi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (ffs:HI (reg:SI 22)))
+              (clobber (reg:QI 22))
+              (clobber (reg:QI 26))])
+   (set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (reg:HI 24)))]
+  ""
+  "")
+
+(define_insn "*ffshi2.libgcc"
+  [(parallel [(set (reg:HI 24)
+                   (ffs:HI (reg:HI 24)))
+              (clobber (reg:QI 26))])]
+  ""
+  "%~call __ffshi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*ffssihi2.libgcc"
+  [(parallel [(set (reg:HI 24)
+                   (ffs:HI (reg:SI 22)))
+              (clobber (reg:QI 22))
+              (clobber (reg:QI 26))])]
+  ""
+  "%~call __ffssi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; Copysign
+
+(define_insn "copysignsf3"
+  [(set (match_operand:SF 0 "register_operand"             "=r")
+        (unspec:SF [(match_operand:SF 1 "register_operand"  "0")
+                    (match_operand:SF 2 "register_operand"  "r")]
+                   UNSPEC_COPYSIGN))]
+  ""
+  "bst %D2,7\;bld %D0,7"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+  
+;; Swap Bytes (change byte-endianess)
+
+(define_expand "bswapsi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (set (reg:SI 22)
+        (bswap:SI (reg:SI 22)))
+   (set (match_operand:SI 0 "register_operand" "")
+        (reg:SI 22))]
+  ""
+  "")
+
+(define_insn "*bswapsi2.libgcc"
+  [(set (reg:SI 22)
+        (bswap:SI (reg:SI 22)))]
+  ""
+  "%~call __bswapsi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+
 ;; CPU instructions
 
 ;; NOP taking 1 or 2 Ticks 
diff --git a/gcc/config/avr/libgcc.S b/gcc/config/avr/libgcc.S
index c2459d00e6b3..7f3feeb23a40 100644
--- a/gcc/config/avr/libgcc.S
+++ b/gcc/config/avr/libgcc.S
@@ -1061,9 +1061,15 @@ ENDF __ffssi2
 ;; clobbers: r26
 DEFUN __ffshi2
     clr  r26
+#ifdef __AVR_HAVE_JMP_CALL__
+    ;; Some cores have problem skipping 2-word instruction
+    tst  r24
+    breq 2f
+#else
     cpse r24, __zero_reg__
+#endif /* __AVR_HAVE_JMP_CALL__ */
 1:  XJMP __loop_ffsqi2
-    ldi  r26, 8
+2:  ldi  r26, 8
     or   r24, r25
     brne 1b
     ret
@@ -1093,12 +1099,12 @@ ENDF __loop_ffsqi2
 #if defined (L_ctzsi2)
 ;; count trailing zeros
 ;; r25:r24 = ctz32 (r25:r22)
-;; ctz(0) = 32
+;; clobbers: r26, r22
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
 DEFUN __ctzsi2
     XCALL __ffssi2
     dec  r24
-    sbrc r24, 7
-    ldi  r24, 32
     ret
 ENDF __ctzsi2
 #endif /* defined (L_ctzsi2) */
@@ -1106,12 +1112,12 @@ ENDF __ctzsi2
 #if defined (L_ctzhi2)
 ;; count trailing zeros
 ;; r25:r24 = ctz16 (r25:r24)
-;; ctz(0) = 16
+;; clobbers: r26
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
 DEFUN __ctzhi2
     XCALL __ffshi2
     dec  r24
-    sbrc r24, 7
-    ldi  r24, 16
     ret
 ENDF __ctzhi2
 #endif /* defined (L_ctzhi2) */
@@ -1245,47 +1251,50 @@ ENDF __parityqi2
 #if defined (L_popcounthi2)
 ;; population count
 ;; r25:r24 = popcount16 (r25:r24)
-;; clobbers: r30, __tmp_reg__
+;; clobbers: __tmp_reg__
 DEFUN __popcounthi2
     XCALL __popcountqi2
-    mov  r30, r24
+    push r24
     mov  r24, r25
     XCALL __popcountqi2
-    add  r24, r30
     clr  r25
-    ret
+    ;; FALLTHRU
 ENDF __popcounthi2
+
+DEFUN __popcounthi2_tail
+    pop   __tmp_reg__
+    add   r24, __tmp_reg__
+    ret
+ENDF __popcounthi2_tail
 #endif /* defined (L_popcounthi2) */
 
 #if defined (L_popcountsi2)
 ;; population count
 ;; r25:r24 = popcount32 (r25:r22)
-;; clobbers: r26, r30, __tmp_reg__
+;; clobbers: __tmp_reg__
 DEFUN __popcountsi2
     XCALL __popcounthi2
-    mov   r26, r24
+    push  r24
     mov_l r24, r22
     mov_h r25, r23
     XCALL __popcounthi2
-    add   r24, r26
-    ret
+    XJMP  __popcounthi2_tail
 ENDF __popcountsi2
 #endif /* defined (L_popcountsi2) */
 
 #if defined (L_popcountdi2)
 ;; population count
 ;; r25:r24 = popcount64 (r25:r18)
-;; clobbers: r22, r23, r26, r27, r30, __tmp_reg__
+;; clobbers: r22, r23, __tmp_reg__
 DEFUN __popcountdi2
     XCALL __popcountsi2
-    mov   r27, r24
+    push  r24
     mov_l r22, r18
     mov_h r23, r19
     mov_l r24, r20
     mov_h r25, r21
     XCALL __popcountsi2
-    add   r24, r27
-    ret
+    XJMP  __popcounthi2_tail
 ENDF __popcountdi2
 #endif /* defined (L_popcountdi2) */
 
-- 
GitLab