From fe39eca4136bf2f9dc740c05e7957027736fc11b Mon Sep 17 00:00:00 2001
From: Robin Dapp <rdapp@ventanamicro.com>
Date: Thu, 13 Jul 2023 09:10:06 +0200
Subject: [PATCH] vect: Handle demoting FLOAT and promoting FIX_TRUNC.

The recent changes that allowed multi-step conversions for
"non-packing/unpacking", i.e. modifier == NONE targets included
promoting to-float and demoting to-int variants.  This patch
adds the missing demoting to-float and promoting to-int handling.

gcc/ChangeLog:

	* tree-vect-stmts.cc (vectorizable_conversion): Handle
	more demotion/promotion for modifier == NONE.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/conversions/vec-narrow-int64-float16.c: New test.
	* gcc.target/riscv/rvv/autovec/conversions/vec-widen-float16-int64.c: New test.
---
 .../conversions/vec-narrow-int64-float16.c    | 15 ++++
 .../conversions/vec-widen-float16-int64.c     | 15 ++++
 gcc/tree-vect-stmts.cc                        | 69 ++++++++++++++-----
 3 files changed, 82 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vec-narrow-int64-float16.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vec-widen-float16-int64.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vec-narrow-int64-float16.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vec-narrow-int64-float16.c
new file mode 100644
index 000000000000..c24d66ae423f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vec-narrow-int64-float16.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable -fdump-tree-vect-details" } */
+
+/* This test ensures that we vectorize the conversion by having the vectorizer
+   create an intermediate type.  */
+
+#include <stdint-gcc.h>
+
+void convert (_Float16 *restrict dst, int64_t *restrict a, int n)
+{
+  for (int i = 0; i < n; i++)
+    dst[i] = (_Float16) (a[i] & 0x7fffffff);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vec-widen-float16-int64.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vec-widen-float16-int64.c
new file mode 100644
index 000000000000..3fd1260f743c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/conversions/vec-widen-float16-int64.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable -fno-trapping-math -fdump-tree-vect-details" } */
+
+/* This test ensures that we vectorize the conversion by having the vectorizer
+   create an intermediate type.  */
+
+#include <stdint-gcc.h>
+
+void convert (int64_t *restrict dst, _Float16 *restrict a, int n)
+{
+  for (int i = 0; i < n; i++)
+    dst[i] = (int64_t) a[i];
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index ed28fbdced33..15e12f247de1 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -5190,31 +5190,66 @@ vectorizable_conversion (vec_info *vinfo,
 	break;
       }
 
-      /* For conversions between float and smaller integer types try whether we
-	 can use intermediate signed integer types to support the
+      /* For conversions between float and integer types try whether
+	 we can use intermediate signed integer types to support the
 	 conversion.  */
-      if ((code == FLOAT_EXPR
-	   && GET_MODE_SIZE (lhs_mode) > GET_MODE_SIZE (rhs_mode))
-	  || (code == FIX_TRUNC_EXPR
-	      && GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode)
-	      && !flag_trapping_math))
+      if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
+	  && (code == FLOAT_EXPR ||
+	      (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
 	{
+	  bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
 	  bool float_expr_p = code == FLOAT_EXPR;
-	  scalar_mode imode = float_expr_p ? rhs_mode : lhs_mode;
-	  fltsz = GET_MODE_SIZE (float_expr_p ? lhs_mode : rhs_mode);
+	  unsigned short target_size;
+	  scalar_mode intermediate_mode;
+	  if (demotion)
+	    {
+	      intermediate_mode = lhs_mode;
+	      target_size = GET_MODE_SIZE (rhs_mode);
+	    }
+	  else
+	    {
+	      target_size = GET_MODE_SIZE (lhs_mode);
+	      if (!int_mode_for_size
+		  (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
+		goto unsupported;
+	    }
 	  code1 = float_expr_p ? code : NOP_EXPR;
 	  codecvt1 = float_expr_p ? NOP_EXPR : code;
-	  FOR_EACH_2XWIDER_MODE (rhs_mode_iter, imode)
+	  opt_scalar_mode mode_iter;
+	  FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
 	    {
-	      imode = rhs_mode_iter.require ();
-	      if (GET_MODE_SIZE (imode) > fltsz)
+	      intermediate_mode = mode_iter.require ();
+
+	      if (GET_MODE_SIZE (intermediate_mode) > target_size)
 		break;
 
-	      cvt_type
-		= build_nonstandard_integer_type (GET_MODE_BITSIZE (imode),
-						  0);
-	      cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type,
-						      slp_node);
+	      scalar_mode cvt_mode;
+	      if (!int_mode_for_size
+		  (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
+		break;
+
+	      cvt_type = build_nonstandard_integer_type
+		(GET_MODE_BITSIZE (cvt_mode), 0);
+
+	      /* Check if the intermediate type can hold OP0's range.
+		 When converting from float to integer this is not necessary
+		 because values that do not fit the (smaller) target type are
+		 unspecified anyway.  */
+	      if (demotion && float_expr_p)
+		{
+		  wide_int op_min_value, op_max_value;
+		  if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
+		    break;
+
+		  if (cvt_type == NULL_TREE
+		      || (wi::min_precision (op_max_value, SIGNED)
+			  > TYPE_PRECISION (cvt_type))
+		      || (wi::min_precision (op_min_value, SIGNED)
+			  > TYPE_PRECISION (cvt_type)))
+		    continue;
+		}
+
+	      cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
 	      /* This should only happened for SLP as long as loop vectorizer
 		 only supports same-sized vector.  */
 	      if (cvt_type == NULL_TREE
-- 
GitLab