From 308d688bebdd1f29cb82c7d0e09b43e57c581659 Mon Sep 17 00:00:00 2001
From: Roger Sayle <roger@nextmovesoftware.com>
Date: Sun, 12 Dec 2021 18:03:03 +0100
Subject: [PATCH] nvptx: Add -misa=sm_75 and -misa=sm_80

Add new target macros TARGET_SM75 and TARGET_SM80.  Add support for
__builtin_tanhf, HFmode exp2/tanh and also for HFmode min/max, controlled by
TARGET_SM75 and TARGET_SM80 respectively.

The following has been tested on nvptx-none, hosted on x86_64-pc-linux-gnu
with a "make" and "make -k check" with no new failures.

gcc/ChangeLog:

	* config/nvptx/nvptx-opts.h (ptx_isa): PTX_ISA_SM75 and PTX_ISA_SM80
	ISA levels.
	* config/nvptx/nvptx.opt: Add sm_75 and sm_80 to -misa.
	* config/nvptx/nvptx.h (TARGET_SM75, TARGET_SM80):
	New helper macros to conditionalize functionality on target ISA.
	* config/nvptx/nvptx-c.c (nvptx_cpu_cpp_builtins): Add __PTX_SM__
	support for the new ISA levels.
	* config/nvptx/nvptx.c (nvptx_file_start): Add support for TARGET_SM75
	and TARGET_SM80.
	* config/nvptx/nvptx.md (define_c_enum "unspec"): New UNSPEC_TANH.
	(define_mode_iterator HSFM): New iterator for HFmode and SFmode.
	(exp2hf2): New define_insn controlled by TARGET_SM75.
	(tanh<mode>2): New define_insn controlled by TARGET_SM75.
	(sminhf3, smaxhf3): New define_isnns controlled by TARGET_SM80.

gcc/testsuite/ChangeLog:

	* gcc.target/nvptx/float16-2.c: New test case.
	* gcc.target/nvptx/tanh-1.c: New test case.
---
 gcc/config/nvptx/nvptx-c.c                 |  6 +++-
 gcc/config/nvptx/nvptx-opts.h              |  4 ++-
 gcc/config/nvptx/nvptx.c                   |  6 +++-
 gcc/config/nvptx/nvptx.h                   |  2 ++
 gcc/config/nvptx/nvptx.md                  | 32 ++++++++++++++++++++++
 gcc/config/nvptx/nvptx.opt                 |  6 ++++
 gcc/testsuite/gcc.target/nvptx/float16-2.c | 20 ++++++++++++++
 gcc/testsuite/gcc.target/nvptx/tanh-1.c    |  9 ++++++
 8 files changed, 82 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/nvptx/float16-2.c
 create mode 100644 gcc/testsuite/gcc.target/nvptx/tanh-1.c

diff --git a/gcc/config/nvptx/nvptx-c.c b/gcc/config/nvptx/nvptx-c.c
index 7efdf705fa6f..d51ad00582a7 100644
--- a/gcc/config/nvptx/nvptx-c.c
+++ b/gcc/config/nvptx/nvptx-c.c
@@ -39,7 +39,11 @@ nvptx_cpu_cpp_builtins (void)
     cpp_define (parse_in, "__nvptx_softstack__");
   if (TARGET_UNIFORM_SIMT)
     cpp_define (parse_in,"__nvptx_unisimt__");
-  if (TARGET_SM53)
+  if (TARGET_SM80)
+    cpp_define (parse_in, "__PTX_SM__=800");
+  else if (TARGET_SM75)
+    cpp_define (parse_in, "__PTX_SM__=750");
+  else if (TARGET_SM53)
     cpp_define (parse_in, "__PTX_SM__=530");
   else if (TARGET_SM35)
     cpp_define (parse_in, "__PTX_SM__=350");
diff --git a/gcc/config/nvptx/nvptx-opts.h b/gcc/config/nvptx/nvptx-opts.h
index 396fe8711633..7b6ecd42fedf 100644
--- a/gcc/config/nvptx/nvptx-opts.h
+++ b/gcc/config/nvptx/nvptx-opts.h
@@ -24,7 +24,9 @@ enum ptx_isa
 {
   PTX_ISA_SM30,
   PTX_ISA_SM35,
-  PTX_ISA_SM53
+  PTX_ISA_SM53,
+  PTX_ISA_SM75,
+  PTX_ISA_SM80
 };
 
 enum ptx_version
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 51eef2b45b2a..ff44d9fdbef9 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -5410,7 +5410,11 @@ nvptx_file_start (void)
     fputs ("\t.version\t6.3\n", asm_out_file);
   else
     fputs ("\t.version\t3.1\n", asm_out_file);
-  if (TARGET_SM53)
+  if (TARGET_SM80)
+    fputs ("\t.target\tsm_80\n", asm_out_file);
+  else if (TARGET_SM75)
+    fputs ("\t.target\tsm_75\n", asm_out_file);
+  else if (TARGET_SM53)
     fputs ("\t.target\tsm_53\n", asm_out_file);
   else if (TARGET_SM35)
     fputs ("\t.target\tsm_35\n", asm_out_file);
diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index 92fd9d3b6d12..a25cccb42d46 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -88,6 +88,8 @@
 
 #define TARGET_SM35 (ptx_isa_option >= PTX_ISA_SM35)
 #define TARGET_SM53 (ptx_isa_option >= PTX_ISA_SM53)
+#define TARGET_SM75 (ptx_isa_option >= PTX_ISA_SM75)
+#define TARGET_SM80 (ptx_isa_option >= PTX_ISA_SM80)
 
 #define TARGET_PTX_6_3 (ptx_version_option >= PTX_VERSION_6_3)
 #define TARGET_PTX_7_0 (ptx_version_option >= PTX_VERSION_7_0)
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index da4ac8f3237b..6599b3a928af 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -26,6 +26,7 @@
    UNSPEC_EXP2
    UNSPEC_SIN
    UNSPEC_COS
+   UNSPEC_TANH
 
    UNSPEC_FPINT_FLOOR
    UNSPEC_FPINT_BTRUNC
@@ -196,6 +197,7 @@
 (define_mode_iterator QHIM [QI HI])
 (define_mode_iterator QHSIM [QI HI SI])
 (define_mode_iterator SDFM [SF DF])
+(define_mode_iterator HSFM [HF SF])
 (define_mode_iterator SDCM [SC DC])
 (define_mode_iterator BITS [SI SF])
 (define_mode_iterator BITD [DI DF])
@@ -1143,6 +1145,36 @@
   "TARGET_SM53"
   "%.\\tmul.f16\\t%0, %1, %2;")
 
+(define_insn "exp2hf2"
+  [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
+	(unspec:HF [(match_operand:HF 1 "nvptx_register_operand" "R")]
+		   UNSPEC_EXP2))]
+  "TARGET_SM75 && flag_unsafe_math_optimizations"
+  "%.\\tex2.approx.f16\\t%0, %1;")
+
+(define_insn "tanh<mode>2"
+  [(set (match_operand:HSFM 0 "nvptx_register_operand" "=R")
+	(unspec:HSFM [(match_operand:HSFM 1 "nvptx_register_operand" "R")]
+		     UNSPEC_TANH))]
+  "TARGET_SM75 && flag_unsafe_math_optimizations"
+  "%.\\ttanh.approx%t0\\t%0, %1;")
+
+;; HFmode floating point arithmetic.
+
+(define_insn "sminhf3"
+  [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
+	(smin:HF (match_operand:HF 1 "nvptx_register_operand" "R")
+		 (match_operand:HF 2 "nvptx_register_operand" "R")))]
+  "TARGET_SM80"
+  "%.\\tmin.f16\\t%0, %1, %2;")
+
+(define_insn "smaxhf3"
+  [(set (match_operand:HF 0 "nvptx_register_operand" "=R")
+	(smax:HF (match_operand:HF 1 "nvptx_register_operand" "R")
+		 (match_operand:HF 2 "nvptx_register_operand" "R")))]
+  "TARGET_SM80"
+  "%.\\tmax.f16\\t%0, %1, %2;")
+
 ;; Conversions involving floating point
 
 (define_insn "extendsfdf2"
diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt
index 04b45da9249f..1d88ef18d048 100644
--- a/gcc/config/nvptx/nvptx.opt
+++ b/gcc/config/nvptx/nvptx.opt
@@ -64,6 +64,12 @@ Enum(ptx_isa) String(sm_35) Value(PTX_ISA_SM35)
 EnumValue
 Enum(ptx_isa) String(sm_53) Value(PTX_ISA_SM53)
 
+EnumValue
+Enum(ptx_isa) String(sm_75) Value(PTX_ISA_SM75)
+
+EnumValue
+Enum(ptx_isa) String(sm_80) Value(PTX_ISA_SM80)
+
 ; Default needs to be in sync with default in ASM_SPEC in nvptx.h.
 misa=
 Target RejectNegative ToLower Joined Enum(ptx_isa) Var(ptx_isa_option) Init(PTX_ISA_SM35)
diff --git a/gcc/testsuite/gcc.target/nvptx/float16-2.c b/gcc/testsuite/gcc.target/nvptx/float16-2.c
new file mode 100644
index 000000000000..5748a9c7a972
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/float16-2.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -misa=sm_80 -mptx=7.0" } */
+
+_Float16 x;
+_Float16 y;
+_Float16 t;
+
+void foo()
+{
+  t = x < y ? x : y;
+}
+
+void bar()
+{
+  t = x > y ? x : y;
+}
+
+/* { dg-final { scan-assembler "min.f16" } } */
+/* { dg-final { scan-assembler "max.f16" } } */
+
diff --git a/gcc/testsuite/gcc.target/nvptx/tanh-1.c b/gcc/testsuite/gcc.target/nvptx/tanh-1.c
new file mode 100644
index 000000000000..56a0e5a85784
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/tanh-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -misa=sm_75 -mptx=7.0" } */
+
+float foo(float x)
+{
+  return __builtin_tanhf(x);
+}
+
+/* { dg-final { scan-assembler "tanh.approx.f32" } } */
-- 
GitLab